*******************************************************************************
*   Title: appendix_final
* Purpose: Removing extraneous code/comments and noting any questions
*   Owner: AN
*    Date: 2022-07-19
*   Notes: This file draws on appendix_20210225.do and appendix_final.do
*******************************************************************************


/*******************************************************************************
    Figures/*%<*/
*******************************************************************************/
capture program drop graph_fiscal_shares/*%<*/
program define graph_fiscal_shares

    /******************************************************************************
        Appendix Figure 3: Income Shares of Major Components of Fiscal Income, Reranked 
        by Fiscal Income Component
    ******************************************************************************/

    /**************************************************************************
        Insheet data
    ***************************************************************************/

    load_analysis_data fiscalflows

    /***************************************************************************
         For each income category, make a graph showing the share of total 
            income in that category accruing to each of the groups
    ***************************************************************************/

    local inccat_list = "fixinc realestatetax dividendinc capitalginc scorppship schcincp wagind peninc"

    export delim using wealth_figure3_v4.csv, replace

    * Note that divs, kgs, interest include estate distributed income flows.
    foreach inccat in `inccat_list' {

        if "`inccat'" == "fixinc" {
            local ytitlab = "Taxable Interest"
            local subfig = "a" 
        }
        else if "`inccat'" == "realestatetax" {
            local ytitlab = "Property Tax"
            local subfig = "b" 
        }
        else if "`inccat'" == "dividendinc" {
            local ytitlab = "Dividends"
            local subfig = "c"
        }
        else if "`inccat'" == "capitalginc" {
            local ytitlab = "Capital Gains"
            local subfig = "d"
        }
        else if "`inccat'" == "schcincp" {
            local ytitlab = "Sole Proprietorship Income" 
            local subfig = "f"
        }
        else if "`inccat'" == "scorppship" {
            local ytitlab = "S-corp + Partnership Income"
            local subfig = "e" 
        }
    * Note that pension income was removed from fiwag (fiscal inc, wages, pens)
        else if "`inccat'" == "wagind" {
            local ytitlab = "Wages" 
            local subfig = "g"
        }
        else {
            assert "`inccat'" == "peninc"
            local ytitlab = "Pension Income" 
            local subfig = "h"
        }

        twoway (scatter sh_`inccat'_top10 year, c(l) ms(X) lc("$u4") mc("$u4")) ///
            (scatter sh_`inccat'_top1 year, c(l) ms(d) lc("$u3") mc("$u3")) ///
            (scatter sh_`inccat'_top01 year, c(l) ms(o) lc("$u1") mc("$u1")) ///
            (scatter sh_`inccat'_top001 year, c(l) ms(t) lc("$p2") mc("$p2")) ///
                if year > 1965, ///
            ytitle("Share of `ytitlab' (%)")  ///
            xtitle(" ") xlab(1965(10)2015)  ///
            graphregion(lcolor(white) fcolor(white)) plotregion(color(white)) ///
            legend(label(1 "Top 10%") label(2 "Top 1%") label(3 "Top 0.1%") label(4 "Top 0.01%") ///
            order(1 2 3 4) row(1) region(lcolor(white)))  ylab(0(20)100) xsize(7.5)
        graph export wealth_figure3`subfig'_v4.pdf, replace
    }
    
end/*%>*/

capture program drop aggwealth_extended/*%<*/
program define aggwealth_extended
/*******************************************************************************
    Appendix Figure 20
	(AE) Aggregate Wealth Extended Back to 1913, earliest year in PSZ estimates
*******************************************************************************/

    /***************************************************************************
         Load SZ 2020 parameters aggregates extending back to 1913 from 
            1945; save as tempfile
    ***************************************************************************/

    import excel using $inputs/parameters.xlsx, ///
        firstrow clear

    keep if inrange(yr, 1913, 1945)
    rename yr year

    keep year ttdivw ttintexmw ttinttaxw ttmmbondfund ttmiscw ttschcpartw ttscorw ///
        ttrentw ttmortw ttrestw ttpeniraw ttpenw ttcurrency
        
    gen ttbus = ttschcpartw + ttscorw
    gen ttpen = ttpenw + ttpeniraw
    gen tthou = ttrestw + ttmortw + ttrentw 

    gen ttfix = ttcurrency + ttinttaxw + ttintexmw + ttmmbondfund

    tempfile sz2020parameters
    save `sz2020parameters'

    /***************************************************************************
        Load PSZ national income series from SZ 2020 macro appendix;
            national income series from FRED only extends back to 1929, but 
            need to go back to 1913
    ***************************************************************************/

    import excel using "$inputs/PSZ2020AppendixTablesI(Aggreg).xlsx", ///
        sheet("TA1") cellrange(A8:B115) clear

    rename (A B) (year y_sz20)

    replace y_sz20 = y_sz20 * 1E9 // Scale out of billions into dollars

    tempfile sz2020natlincome
    save `sz2020natlincome'

    /***************************************************************************
         Load new version of wealth aggregates, constructed from 2020Q3 
            release of parameters mostly following SZ 2020. No duplicates
    ***************************************************************************/

    tempfile natinc
    load_analysis_data natinc
    save `natinc'

    import delimited using aggwealth.csv, clear

    append using `sz2020parameters'

    /***************************************************************************
         Normalize pre-1946 values by national income
    ***************************************************************************/

        /***********************************************************************
             Merge on national income sources, ensure everything's 
                kosher
        ***********************************************************************/

    merge 1:1 year using `natinc', update /* assert(1 3 4) */
    drop _merge

    merge 1:1 year using `sz2020natlincome',  assert(2 3)  keep(3) nogen

    gen checkratio = y / y_sz20
    assert abs(1 - checkratio) < 1E-2 if year >= 1929

    replace y = y_sz20 if year < 1929
    drop y_sz20

        /***********************************************************************
             Divide wealth aggregates by national income
        ***********************************************************************/

    foreach millions of varlist ttdivw ttbus ttpen ttfix tthou {
        assert missing(`millions'_share_y) if year < 1946
        replace `millions'_share_y = (`millions' * 1E6 / y) * 100 if year < 1946
    }

    /***************************************************************************
        Make figure
    ***************************************************************************/

    sort year
    #delimit ;
    twoway (connect ttfix_share_y year, ms(o) color("$u1")) 
       (connect tthou_share_y year, ms(x) mcolor("$p2") lcolor("$p2")) 
       (connect ttpen_share_y year, ms(t) color("$f4")) 
       (connect ttpen_preferred_share_y year, ms(th) color("$f4") lpattern(-)) 
       (connect ttdivw_share_y year, ms(d) color("$u3")) 
       (connect ttbus_preferred_share_y year, ms(s) color("$f3"))
       (connect ttbus_supple_share_y year, ms(sh) color("$f3") lpattern(-))
       , 
       ytitle("Share of National Income (%)") xtitle(" ") 
       xlab(1910(10)2020) ylab(0(50)200)
       graphregion(lcolor(white) fcolor(white)) plotregion(color(white)) 
       legend(order(4 "Pensions (Inc. Unfunded DB)" 
                    7 "Pass-through (Supplemental)" 1 "Fixed Income"  
                    3 "Pensions (Baseline)"
                    6 "Pass-through (Baseline)" 5 "C-corporation Wealth"
                    2 "Housing (Net of Mortgages)") 
              region(lcolor(white) margin(tiny)) row(3)) xsize(8);
    #delimit cr
    graph export aggwealth_extended.pdf, replace

end/*%>*/

capture program drop graph_aggregate_fiscal_wealth/*%<*/
program define graph_aggregate_fiscal_wealth

    /***************************************************************************
        Appendix Figures 19 and 21
		 Aggregate Household Wealth
    ***************************************************************************/
       
    /***********************************************************************
         Pull national income from FRED (don't actually need to run
            every time)
    ***********************************************************************/
    load_analysis_data natinc
    tempfile natinc 
    save `natinc'

    /***********************************************************************
         Prepare wealth aggregates reconstructed to match SZ 2020 
            parameters (plus parameters from PSZ for appendix)
    ***********************************************************************/
    load_analysis_data parameters_new
    tempfile sz2020expanded
    save `sz2020expanded'

    load_analysis_data parameters_older
    tempfile psz2018
    keep year ttbus ttpen tthou ttfix ttdivw
    rename (tt*) (tt*_old)
    save `psz2018'

    /***********************************************************************
         Load series we don't construct to match SZ 2020 aggregates:
            pensions and private business wealth
    ***********************************************************************/
    load_analysis_data szz
    keep if group == "All"
    isid year

    assert inrange(year, 1966, 2016)

    gen ttbus_preferred = hwbus_base - (0.2 * ccorw_base)
    rename hwpen_pref ttpen_preferred

    assert !missing(ttpen_preferred) & !missing(ttbus_preferred) 

    gen ttbus_supple = hwbus_pref - (0.2 * ccorw_pref)

    keep year tt*_preferred tt*_supple nonmort* hweal_preferred hweal20

    tempfile preferred_bus_pen
    save `preferred_bus_pen'

    /***********************************************************************
         Merge together different files
    ***********************************************************************/
    use `sz2020expanded', clear
    merge 1:1 year using `preferred_bus_pen', keep(1 3) nogen
    merge 1:1 year using `psz2018', keep(1 3) nogen
    
    merge 1:1 year using `natinc', keep(3) nogen

    * Parameters aggregates are in millions
    foreach millions of varlist ttdivw ttbus ttpen ttfix tthou ttpen_preferred_check {
        gen `millions'_share_y = (`millions' * 1E6 / y) * 100
    }

    foreach millions of varlist ttdivw_old ttbus_old ttpen_old ttfix_old tthou_old {
        gen `millions'_share_y = (`millions' * 1E6 / y) * 100
    }

    foreach dollars of varlist ttpen_preferred ttbus_preferred ttbus_supple nonmort* hweal_preferred hweal20 {
        gen `dollars'_share_y = (`dollars' / y) * 100
    }

    assert inrange(ttpen_preferred_share_y / ttpen_preferred_check_share_y, 0.99, 1.01) ///
        if inrange(year, 1966, 2016)
    drop ttpen_preferred_check*

    /***********************************************************************
         Plot series
        Notes to self: aggregates use parameters new. see DS memo on
        Reconstructing SZ2020 for how these change. 
        - ttdivw is lower than 04/2020 because SZ S-corp estimate is larger and
          because money market funds are no longer allocated to this bucket
        - ttbus is higher because ttscorww is higher and because ttschcpartw has
          less mortgages than before (these go over to rental housing)
        - ttfix no longer includes miscw
    ***********************************************************************/
    export delim aggwealth.csv, replace

    keep if year > 1965 & year <= 2016

    sort year
    #delimit ;
    twoway (connect ttfix_share_y year, ms(o) mcolor("$u1") lcolor("$u1")) 
       (connect tthou_share_y year, ms(x) mcolor("$p2") lcolor("$p2")) 
       (connect ttpen_share_y year, ms(t) mc("$f4") lc("$f4")) 
       (connect ttpen_preferred_share_y year, ms(th) mc("$f4") lc("$f4") lpattern(-)) 
       (connect ttdivw_share_y year, ms(d) mcolor("$u3") lcolor("$u3")) 
       (connect ttbus_preferred_share_y year, ms(s) mcolor("$f3") lcolor("$f3"))
       (connect ttbus_supple_share_y year, ms(sh) mcolor("$f3") lcolor("$f3") lpattern(-))
       , 
       ytitle("Share of National Income (%)") xtitle(" ") 
       xlab(1965(5)2016) ylab(0(50)200)
       graphregion(lcolor(white) fcolor(white)) plotregion(color(white)) 
       legend(order(4 "Pensions (Inc. Unfunded DB)" 
                    7 "Pass-through (Supplemental)" 1 "Fixed Income"  
                    3 "Pensions (Baseline)"
                    6 "Pass-through (Baseline)" 5 "C-corporation Wealth"
                    2 "Housing (Net of Mortgages)") 
              region(lcolor(white) margin(tiny)) row(3)) xsize(8);
    #delimit cr
    graph export aggwealth.pdf, replace

    #delimit ;
    twoway 
       (connect ttfix_share_y year, ms(o) mcolor("$u1") lcolor("$u1")) 
       (connect ttfix_old_share_y year, ms(oh) mcolor("$u1") lcolor("$u1")) 
       (connect tthou_share_y year, ms(x) mcolor("$p2") lcolor("$p2")) 
       (connect tthou_old_share_y year, ms(x) lp(-) mcolor("$p2") lcolor("$p2")) 
       (connect ttpen_share_y year, ms(t) mc("$f4") lc("$f4")) 
       (connect ttpen_old_share_y year, ms(th) mc("$f4") lc("$f4")) 
       (connect ttdivw_share_y year, ms(d) mcolor("$u3") lcolor("$u3")) 
       (connect ttdivw_old_share_y year, ms(dh) mcolor("$u3") lcolor("$u3")) 
       (connect ttbus_share_y year, ms(s) mcolor("$f3") lcolor("$f3"))
       (connect ttbus_old_share_y year, ms(sh) mcolor("$f3") lcolor("$f3"))
       , 
       ytitle("Share of National Income (%)") xtitle(" ") 
       xlab(1965(5)2016) ylab(0(50)200)
       graphregion(lcolor(white) fcolor(white)) plotregion(color(white)) 
       legend(order(6 "Pensions (PSZ 18)" 
                    5 "Pensions (SZ 20)" 
                    2 "Fixed Income (PSZ 18)"  
                    1 "Fixed Income (SZ 20)"  
                    4 "Housing (Net of Mtg., PSZ 18)"
                    3 "Housing (Net of Mtg., SZ 20)"
                    8 "C-corporation Wealth (PSZ 18)"
                    7 "C-corporation Wealth (SZ 20)"
                    10 "Pass-through (PSZ 18)" 
                    9 "Pass-through (SZ 20)") 
              region(lcolor(white) margin(tiny)) col(2)) xsize(8);
    #delimit cr
    graph export aggwealth_movingtargets.pdf, replace


    /***************************************************************************
         Aggregate Components of Fiscal Income
    ***************************************************************************/
    
    /***********************************************************************
         Load in revised real estate numbers
    ***********************************************************************/
    tempfile realestate
    import delimit $inputs/rerank_sums_20200228_nocount.csv, clear
    keep if w_group == 0
    drop w_group
    gen group = "All"
    save `realestate'

    /***********************************************************************
         Run dina_collapse to pull in aggregate tax data.
    ************************************************************************/
    load_analysis_data szz
    * Keeping other groups for appendix graphs
    drop if group != "All"

    keep year group inc_fix inc_scorp inc_div inc_kg inc_bus inc_pen

    merge 1:1 year using `realestate', assert(3) nogen

    order year
                
    /***********************************************************************
         Merge to national income series
    ***********************************************************************/
    merge 1:1 year using `natinc', keep(3) nogen

    /***********************************************************************
         Compute components share of national income
    ***********************************************************************/
    foreach inccat of varlist inc* realestatetax {
        gen `inccat'_share_y = (`inccat' / y) * 100 
    }

    gen inc_passthrough_share_y = inc_scorp_share_y + inc_bus_share_y

    keep year *_share_y

    /***********************************************************************
        Make graphs
    ***********************************************************************/
    export delim using aggfiscalincome.csv, replace
    #delimit ;
    twoway (connect inc_fix_share_y year, ms(o) lc("$u1") mc("$u1")) 
        (connect inc_div_share_y year, ms(d) lc("$u3") mc("$u3")) 
        (connect inc_kg_share_y year, ms(d) lc("$u3") mc("$u3") lpattern(-)) 
        (connect inc_pen_share_y year, ms(t) mc("$f4") lc("$f4")) 
        (connect inc_passthrough_share_y year, ms(s) lc("$f3") mc("$f3")) 
        (connect realestatetax_share_y year, ms(x) lc("$p2") mc("$p2"))
        ,
        $gpr
        xlab(1965(5)2016) ylab(0(1)9)
        ytitle("Share of National Income (%)") xtitle(" ")
        legend(order(2 "Dividends" 3 "Capital Gains" 4 "Pension Distributions"
                     1 "Taxable Interest" 5 "Pass-through Business" 6 "Property Tax")
               row(2) region(lcolor(white)))
        xsize(8);
    #delimit cr
    graph export aggfiscalincome.pdf, replace

end/*%>*/

capture program drop sz16fig4Breplication/*%<*/
program define sz16fig4Breplication
/*******************************************************************************
    Appendix Figure 32
	(SZ4B): Top Wealth Shares vs. Capitalized Income Shares in SCF (replicating 
        SZ 2016 figure 4B). Code and data for this is lifted directly from 
        sz_scf_fig4b.do in ~/syzzle/graphs/20190602_oz.
*******************************************************************************/


    /***************************************************************************
         Import wealth aggregates in SCF ranked by following household 
            wealth definition:

            hweal_rank2 = taxbond_rawscf + ccorw_rawscf + busw_rawscf  
                        + currency_rawscf + nonmort_rawscf + muni_rawscf

            Then rename all variables so it's clear they're grouped by hweal2, 
            and save as a tempfile.
    ***************************************************************************/

    use "$inputs/wealth_sz_scf_rank2.dta", clear

    #delimit ;
    local sumlist_new = "n hweal_rank1 hweal_rank2 hweal_rank3 hweal_rank4 
                        taxbond_rawscf taxbond_iscf muni_rawscf muni_iscf 
                        currency_rawscf nonmort_rawscf ccorw_rawscf ccorw_iscf 
                        ccorw_0kg_iscf busw_rawscf busw_iscf";
    #delimit cr

    foreach var in `sumlist_new' w2_group *_frac {
        rename `var' `var'_rank2
    }

    drop group
    rename w2_group group

    tempfile rank2
    save `rank2'

    /***************************************************************************
         Import wealth aggregates in SCF ranked by following household 
            wealth definition:

            hweal_rank4 = taxbond_iscf + ccorw_0kg_iscf + busw_iscf
                        + currency_rawscf + nonmort_rawscf + muni_iscf

            Then subtract one from year, restructure slightly, and merge to 
            tempfile above
    ***************************************************************************/

    use "$inputs/wealth_sz_scf_rank4.dta", clear
    
    replace year = year - 1
    
    drop group
    
    rename w4_group group

    merge 1:1 year group using `rank2', nogen

    /***************************************************************************
         Make graphs
    ***************************************************************************/

        /***********************************************************************
             Original graph replicating SZ 2016 Figure IVB
        ***********************************************************************/

    #delimit ;
    twoway (scatter hweal_rank2_frac_rank2 year if group == 2, lcolor("$u1") 
            mcolor("$u1") msymbol(d) c(l)) 
        (scatter hweal_rank3_frac year if group == 2, lcolor("$u3") 
            mcolor("$u3") msymbol(ch) c(l) lpattern(shortdash)) 
        (scatter hweal_rank2_frac_rank2 year if group == 3, lcolor("$u1") 
            mcolor("$u1") msymbol(d) c(l)) 
        (scatter hweal_rank3_frac year if group == 3, lcolor("$u3") 
            mcolor("$u3") msymbol(ch) c(l) lpattern(shortdash)) 
        (scatter hweal_rank2_frac_rank2 year if group == 4, lcolor("$u1") 
            mcolor("$u1") msymbol(d) c(l)) 
        (scatter hweal_rank3_frac year if group == 4, lcolor("$u3") 
            mcolor("$u3") msymbol(ch) c(l) lpattern(shortdash)) 
        , 
    $gpr ytitle("Share of Household Wealth (%)" "(Excluding Housing and Pensions)") 
        xtitle("") 
        legend(label(1 "SCF Top 10%") 
               label(2 "SCF Capitalized Top 10%")
               label(3 "SCF Top 1%")
               label(4 "SCF Capitalized Top 1%")
               label(5 "SCF Top 0.1%")
               label(6 "SCF Capitalized Top 0.1%")
               order(1 2 3 4 5 6) row(3) region(lc(white)))
        ysc(titlegap(2) range(-0.05 1.05))  ylab(0(.2)1)
        xsca(range(1987 2016))
        xlab(1988(3)2015);
    #delimit cr
    graph export sz_scf_fig4b_rerank.pdf, replace

        /***********************************************************************
             Taxable fixed income version
        ***********************************************************************/
    
    #delimit ;
    twoway (scatter taxbond_rawscf_frac_rank2 year if group == 2, lcolor("$u1") 
            mcolor("$u1") msymbol(d) c(l))
        (scatter taxbond_iscf_frac year if group == 2, lcolor("$u3") 
            mcolor("$u3") msymbol(ch) c(l) lpattern(shortdash))
        (scatter taxbond_rawscf_frac_rank2 year if group == 3, lcolor("$u1") 
            mcolor("$u1") msymbol(d) c(l))
        (scatter taxbond_iscf_frac year if group == 3, lcolor("$u3") 
            mcolor("$u3") msymbol(ch) c(l) lpattern(shortdash))
        (scatter taxbond_rawscf_frac_rank2 year if group == 4, lcolor("$u1") 
            mcolor("$u1") msymbol(d) c(l))
        (scatter taxbond_iscf_frac year if group == 4, lcolor("$u3") 
            mcolor("$u3") msymbol(ch) c(l) lpattern(shortdash))
        ,
    $gpr ytitle("Share of Fixed Income Wealth (%)") xtitle("") 
        legend(label(1 "SCF Top 10%") 
               label(2 "SCF Capitalized Top 10%")
               label(3 "SCF Top 1%")
               label(4 "SCF Capitalized Top 1%")
               label(5 "SCF Top 0.1%")
               label(6 "SCF Capitalized Top 0.1%")
               order(1 2 3 4 5 6) row(3) region(lc(white)))
        ysc(titlegap(2) range(-0.05 1.05))  ylab(0(.2)1)
        xsca(range(1987 2016))
        xlab(1988(3)2015);
    #delimit cr
    graph export sz_scf_fig4b_rerank_taxbond.pdf, replace

        /***********************************************************************
             Private business version
        ***********************************************************************/
    
    #delimit ;
    twoway (scatter busw_rawscf_frac_rank2 year if group == 2, lcolor("$u1") 
            mcolor("$u1") msymbol(d) c(l))
        (scatter busw_iscf_frac year if group == 2, lcolor("$u3") 
            mcolor("$u3") msymbol(ch) c(l) lpattern(shortdash))
        (scatter busw_rawscf_frac_rank2 year if group == 3, lcolor("$u1") 
            mcolor("$u1") msymbol(d) c(l))
        (scatter busw_iscf_frac year if group == 3, lcolor("$u3") 
            mcolor("$u3") msymbol(ch) c(l) lpattern(shortdash))
        (scatter busw_rawscf_frac_rank2 year if group == 4, lcolor("$u1") 
            mcolor("$u1") msymbol(d) c(l))
        (scatter busw_iscf_frac year if group == 4, lcolor("$u3") 
            mcolor("$u3") msymbol(ch) c(l) lpattern(shortdash))
        ,
        $gpr ytitle("Share of Non C-corp Business Wealth (%)") xtitle("")
        legend(label(1 "SCF Top 10%") 
               label(2 "SCF Capitalized Top 10%")
               label(3 "SCF Top 1%")
               label(4 "SCF Capitalized Top 1%")
               label(5 "SCF Top 0.1%")
               label(6 "SCF Capitalized Top 0.1%")
               order(1 2 3 4 5 6) row(3) region(lc(white)))
        ysc(titlegap(2) range(-0.05 1.05))  ylab(0(.2)1)
        xsca(range(1987 2016))
        xlab(1988(3)2015);
    #delimit cr
    graph export sz_scf_fig4b_rerank_busw.pdf, replace

end/*%>*/

capture program drop topsharegrowthdecomp/*%<*/
program define topsharegrowthdecomp
/*******************************************************************************
    Appendix Figure 13
	(GD) Top Wealth Shares Growth Decomposition into Portfolio Categories
*******************************************************************************/

    /***************************************************************************
         Retrieve components shares of total wealth from figure 14 output;
            calculate component-wise change and save as tempfile. This involves
            a lot of finagling of the shape of the data.
    ***************************************************************************/

    import delimited using wealthcompovertime.csv, clear

    keep if inlist(year, 1978, 1989, 2000, 2001, 2016)
    
    keep year *_tsh_*_top1 *_tsh_*_top01 *_tsh_*_top001
    drop pen_hou_oth* hwequ* hwbus* *_dfa_* *_equal_*  *_supple_* 

    rename *_tsh_* *_*

    rename *_equ_* *_psz18_*

    #delimit ;
    reshape long hwfix_psz18 ccorw_psz18 pthru_psz18 hwhou_psz18 hwpen_psz18 
            hwoth_psz18 hwfix_preferred ccorw_preferred pthru_preferred 
            hwhou_preferred hwpen_preferred hwoth_preferred hwfix_scf ccorw_scf 
            pthru_scf hwhou_scf hwpen_scf hwoth_scf, i(year) j(topgrp, string);
    #delimit cr
   
    replace topgrp = subinstr(topgrp, "_", "", 1)

    qui ds year topgrp, not
    local difflist = "`r(varlist)'"

    reshape wide *_psz18 *_preferred *_scf, i(topgrp) j(year)

    foreach diffvar in `difflist' {
        gen `diffvar'2016_1989 = `diffvar'2016 - `diffvar'1989
        assert !missing(`diffvar'2016_1989)
        gen `diffvar'2016_2001 = `diffvar'2016 - `diffvar'2001
        assert !missing(`diffvar'2016_2001)
        
        if !regexm("`diffvar'", "scf") {
            gen `diffvar'2016_1978 = `diffvar'2016 - `diffvar'1978
            gen `diffvar'2016_2000 = `diffvar'2016 - `diffvar'2000

            assert !missing(`diffvar'2016_1978) & !missing(`diffvar'2016_2000)
        }
        drop `diffvar'2016 `diffvar'2001 `diffvar'2000 `diffvar'1989 `diffvar'1978 
    }

    #delimit ;
    rename (hwfix_* ccorw_* pthru_* hwhou_* hwpen_* hwoth_*)
        (*_hwfix *_ccorw *_pthru *_hwhou *_hwpen *_hwoth);

    reshape long preferred2016_2000 psz182016_2000 preferred2016_1989 psz182016_1989 
        scf2016_1989 preferred2016_2001 psz182016_2001 
        scf2016_2001 preferred2016_1978 psz182016_1978, i(topgrp) j(wlthcat, string);
    #delimit cr

    replace wlthcat = subinstr(wlthcat, "_", "", .)

    tempfile components
    save `components'

    /***************************************************************************
         Retrieve preferred and SCF top wealth shares from figure 1 
            output; calculate top shares change between years. Also involves
            a lot of finagling.
    ***************************************************************************/

    import delimited using topshares_adjusted.csv, clear

    keep if inlist(year, 1978, 1989, 2000, 2001, 2016)

    keep year top1_* top01_* top001_*
    drop *_pszes* *_equrtrns *_scfpref

    rename (*_preferred *_equrtrns_v3 *_scfpref_forbes) (preferred_* psz18_* scf_*)

    reshape long preferred psz18 scf, i(year) j(topgrp, string)
    replace topgrp = subinstr(topgrp, "_", "", 1)

    reshape wide preferred psz18 scf, i(topgrp) j(year)

    foreach diffvar in preferred psz18 scf {
        gen `diffvar'2016_1989 = `diffvar'2016 - `diffvar'1989
        assert !missing(`diffvar'2016_1989)
        gen `diffvar'2016_2001 = `diffvar'2016 - `diffvar'2001
        assert !missing(`diffvar'2016_2001)
        
        if !regexm("`diffvar'", "scf") {
            gen `diffvar'2016_1978 = `diffvar'2016 - `diffvar'1978
            gen `diffvar'2016_2000 = `diffvar'2016 - `diffvar'2000

            assert !missing(`diffvar'2016_1978) & !missing(`diffvar'2016_2000)
        }
        drop `diffvar'2016 `diffvar'2000 `diffvar'2001 `diffvar'1989 `diffvar'1978
    }

    gen wlthcat = "hweal"

    /***************************************************************************
        Append component-wise shares and save as tempfile
    ***************************************************************************/

    append using `components'

    isid topgrp wlthcat

    tempfile appended
    save `appended'

    /***************************************************************************
        Ensure component-wise changes add up to change in top shares of 
            aggregate wealth
    ***************************************************************************/

    replace wlthcat = "componentwise" if wlthcat != "hweal"

    collapse (sum) preferred* psz18* scf*, by(wlthcat topgrp)

    replace wlthcat = "_" + wlthcat

    reshape wide preferred* psz18* scf*, i(topgrp) j(wlthcat, string)

    foreach column in preferred2016_2000 psz182016_2000 preferred2016_1989 ///
        psz182016_1989 scf2016_1989 preferred2016_2001 ///
        psz182016_2001 scf2016_2001 preferred2016_1978 psz182016_1978 {
        di "`column'"
        assert abs(`column'_componentwise - `column'_hweal) < .1
    }

    /***************************************************************************
        Make plots (this requires a bit of finagling)
    ***************************************************************************/

    use `appended', clear

    format psz18* preferred* scf* %9.1fc

    expand 3
    sort wlthcat topgrp

    foreach timerange in 2016_1978 2016_2001 2016_1989 2016_2000 {
        by wlthcat topgrp: replace psz18`timerange' = . if _n != 1
        by wlthcat topgrp: replace preferred`timerange' = . if _n != 2
        
        if "`timerange'" == "2016_1989" | "`timerange'" == "2016_2001"  {
            by wlthcat topgrp: replace scf`timerange' = . if _n != 3
        }
    }

    #delimit ;
    gen catorder = cond(wlthcat == "hwfix", 1,
                   cond(wlthcat == "ccorw", 2,
                   cond(wlthcat == "pthru", 3,
                   cond(wlthcat == "hwhou", 4,
                   cond(wlthcat == "hwpen", 5,
                   cond(wlthcat == "hwoth", 6, 7))))));
    #delimit cr

    by wlthcat topgrp: gen barorder2016_1989 = catorder + ((_n - 2) * 0.25)
    by wlthcat topgrp: gen barorder2016_2001 = catorder + ((_n - 2) * 0.25)
    by wlthcat topgrp: gen barorder_other = catorder + ((_n - 1.5) * 0.4)

    foreach makelab of varlist psz18* scf* preferred* {
        tostring `makelab', gen(`makelab'lab) usedisplayformat force
    }

    foreach labvar of varlist *lab {
        replace `labvar' = "" if `labvar' == "."
    }

    foreach topgrp in top1 top01 top001 {
        
        local grplab = cond("`topgrp'" == "top1", "top 1", ///
                       cond("`topgrp'" == "top01", "top 0.1", "top 0.01"))

        #delimit ;
        graph twoway (bar psz182016_1989 preferred2016_1989 scf2016_1989 barorder2016_1989
                if topgrp == "`topgrp'", barw(0.25 0.25 0.25) bcolor("$u3" "$u1" "$p2"))
            (scatter psz182016_1989 preferred2016_1989 scf2016_1989 barorder2016_1989 
                    if topgrp == "`topgrp'",
                ms(none none none) 
                mlab(psz182016_1989lab preferred2016_1989lab scf2016_1989lab)
                mlabcolor(black black black) mlabpos(12 12 12))
            ,
            $gpr
            ytitle("Chg in `grplab'% wealth sh by component, 1989-2016")
            xtitle("")
            ylab(, format(%9.0fc))
            xlab(1 "Fixed Income" 2 "C-corporations" 3 "Pass-through Business" 
                 4 "Housing" 5 "Pensions" 6 "Other" 7 "Total", valuelabel notick)
            legend(order(1 "Equal Returns" 2 "Baseline" 3 "Harmonized SCF w/Forbes") 
                region(lcolor(white) margin(tiny)) row(1) size(medlarge))
            xsize(10);   
        #delimit cr
        graph export `topgrp'sharegrowthdecomp1989_2016.pdf, replace
    }

end/*%>*/

capture program drop public_corp_share/*%<*/
program define public_corp_share
/*******************************************************************************
    Appendix Figure 36
	(PC) Public Company Share of Corporate Activity
*******************************************************************************/
    
    /***************************************************************************
        Cycling through groups of corporations, load in profits, 
            dividends, capital, sales, profits, and count. Drop prior to 1994 
            b/c faulty weights in 1993.
    ***************************************************************************/

    foreach corpgrp in "pubpvt_c" "pubpvt" {
        
        local filename = cond("`corpgrp'" == "pubpvt_c", "pubvpri_ccorps", "pubvpri")

        * Created by STS/xpez_20170825
        insheet using "$inputs/profitsdivs_`filename'_20170825.csv", clear

        if "`corpgrp'" == "pubpvt_c" { // A little extra cleaning for C-corps
            drop if _n >= 49 
            drop v8-v12
            destring public_co-count, replace
        }

        keep if year > 1993

    /***************************************************************************
        Compute totals of dividends, capital, sale, profits, and count; 
            then compute public company shares of each quantity
    ***************************************************************************/

        sort year
        foreach object of varlist dividends capital sale profits count {
            by year: egen total_`object' = total(`object')
        }

        keep if public_co == 1

        foreach sharevar of varlist dividends capital sale profits count {
            by year: gen `sharevar'_share = (`sharevar' / total_`sharevar') * 100
        }

    /***************************************************************************
        Make graphs
    ***************************************************************************/

        if ("`corpgrp'" == "pubpvt_c") {
            #delimit;
            twoway (connect profits_share year, msym(s) lc("$u1") mc("$u1")) 
                (connect sale_share year, msym(t) lc("$u3") mc("$u3")) 
                (connect capital_share year, msym(d) lc("$p2") mc("$p2")) 
                (connect count_share year, msym(x) lc("$f1") mc("$f1"))
                (connect dividends_share year, msym(o) lc("$u5") mc("$u5"))
                ,
                $gpr
                ytitle("Public Company Share of C-corporation Activity (%)") xtitle("")
                xlab(1995(5)2015)
                yscale(range(0 100))
                ylab(0(20)100)
                legend(order(1 "Profits" 2 "Sales" 3 "Capital" 4 "Number of Firms" 
                    5 "Dividends") region(lcolor(white) margin(tiny)) col(3));
            #delimit cr
            graph export public_share_ccorps.pdf, replace
        } 
        else if ("`corpgrp'" == "pubpvt") {
            #delimit;
            twoway (connect profits_share year, msym(s) lc("$u1") mc("$u1")) 
                (connect sale_share year, msym(t) lc("$u3") mc("$u3")) 
                (connect capital_share year, msym(d) lc("$p2") mc("$p2")) 
                (connect count_share year, msym(x) lc("$f1") mc("$f1"))
                , 
                $gpr
                ytitle("Public Company Share of Corporate Activity (%)") xtitle("")
                xlab(1995(5)2015)
                yscale(range(0 100))
                ylab(0(20)100)
                legend(order(1 "Profits" 2 "Sales" 3 "Capital" 4 "Number of Firms") 
                    region(lcolor(white) margin(tiny)) col(2));
            #delimit cr
            graph export public_share_allcorps.pdf, replace
        }
    }
end/*%>*/

capture program drop carried_interest/*%<*/
program define carried_interest 
/*******************************************************************************
    Appendix Figure 26
	(ICI) Identifying Carried Interest Compensation among Realized Capital Gains
*******************************************************************************/

    /***************************************************************************
        Load Sales of Capital Assets (SOCA) data file from Capitalists 
            appendix and collapse to yield aggregaes by year then scale down by 
            1K.
    ***************************************************************************/

    import delimited using "$inputs/asset_comp_by_year.csv", clear

    collapse (sum) net_tot net_pthrough, by(year)

    replace net_tot = net_tot / 1E6
    replace net_pthrough = net_pthrough / 1E6

    tempfile soca
    save `soca'

    /***************************************************************************
        Load data collapse created by Sam Wallach-Hanson and
      top-censor at 5 trillion. 
    ***************************************************************************/

    use "$inputs/yearly_k1cg_collapse_20190610.dta", clear

    foreach topcensor of varlist cdw_tot_cg cdw_ind_cg cdw_tot_cg_nonegs ///
        cdw_ind_cg_nonegs {

        replace `topcensor' = . if `topcensor' > 5E12
    }

    gen gp_share_tot = 100 * efile_gp_cg / efile_tot_cg
    gen gp_share_ind = 100 * efile_ind_gp_cg / efile_ind_cg

    gen gp_share_ordtot = 100 * efile_gp_ordinc / efile_tot_ordinc
    gen gp_share_ordind = 100 * efile_ind_gp_ordinc / efile_ind_ordinc

    tempfile cdw
    save `cdw'

    /***************************************************************************
        Load income flows by group from Steph Kestelman's collapse of 
            SOI aggregates.
    ***************************************************************************/

    use "$inputs/fig3_flows_1966_2014.dta", clear

    isid year group

    merge m:1 year using `soca', keepusing(net_total net_pthrough) keep(1 3) nogen
    merge m:1 year using `cdw', keepusing(gp_share_* cdw_tot_cg efile_gp_cg efile_tot_cg) nogen

    foreach unscaled of varlist fikgi cdw_tot_cg efile_*_cg {
        replace `unscaled' = `unscaled' / 1E9
    }

    /***************************************************************************
         Make plots!
    ***************************************************************************/

    #delimit ;
    twoway  (bar net_total year, barw(.5) col("$u1")) 
        (connect fikgi year, ms(o) lc("$u3") mc("$u3"))
        if group == "All"
        ,
        ytitle("Aggregate Net Capital Gains (Billions)") xtitle("")
        $gpr xsize(7) xlab(1965(5)2015)
        legend(order(2 "SOI Sample" 1 "SOI SOCA Study") 
            region(lc(white) margin(tiny)));
    graph export "soi_sample_v_soca.pdf", replace;

    twoway (bar net_pthrough year, barw(.5) col("$u1")) 
        (connect fikgi year, ms(o) lc("$u3") mc("$u3"))
        (connect cdw_tot_cg year, ms(t) lc("$p2") mc("$p2"))
        (connect efile_tot_cg year, ms(s) lc("$u5") mc("$u5"))
            if (group == "All" & year > 1995) | year > 2014
        ,
        $gpr 
        ytitle("Aggregate Net Capital Gains (Billions)") xtitle("")
        xlab(1995(5)2015) 
        ysca(range(-200 800)) ylab(-200(200)800)
        legend(order(1 "SOI SOCA Net Pass-through" 2 "SOI Sample All Capital Gains" 
                     3 "Population Total 1065 K-1 Gains" 4 "E-file Total 1065 K-1 Gains") 
            col(2) region(lc(white) margin(tiny)))
        xsize(7);
        graph export "soca_vs_cdw_efile_capgains.pdf", replace;

    twoway (connect gp_share_tot year, ms(o) lc("$u1") mc("$u1")) 
        (connect gp_share_ind year, ms(s) lc("$f1") mc("$f1"))
        (connect gp_share_ordtot year, ms(d) lc("$u3") mc("$u3")) 
        (connect gp_share_ordind year, ms(t) lc("$f3") mc("$f3"))
        if (group == "All" & year > 2009) | year > 2014
        ,
        $gpr
        ytitle("General Partner Share of Income (%)") xtitle("")
        xlab(2009(1)2017)
        ysca(range(0 100)) ylab(0(20)100)
        legend(order(1 "Cap Gains (All E-file 1065 K-1s)"
                     2 "Cap Gains (Indl E-file 1065 K-1s)"
                     3 "Ord Inc (All E-file 1065 K-1s)" 
                     4 "Ord Inc (Indl E-file 1065 K-1s)") 
            col(2) region(lc(white) margin(tiny)))
        xsize(7) ;
    graph export "efile_gpshares.pdf", replace;

    twoway (connect fikgi year if group == "All", ms(o) lc("$u1") mc("$u1"))
        (connect fikgi year if group == "P99-100", ms(s) lc("$u3") mc("$u3"))
        (connect fikgi year if group == "P99.9-100", ms(t) lc("$p2") mc("$p2"))
        (connect efile_gp_cg year if group == "All" | year > 2014, ms(d) lc("$u5") mc("$u5"))
            if year > 2009
        ,
        $gpr 
        ytitle("Aggregate Net Capital Gains (Billions)") xtitle("")
        xlab(2009(1)2017) 
        ysca(range(0 600)) ylab(0(100)600)
        legend(order(1 "SOI Sample All Capital Gains" 2 "SOI Sample Top 1% Capital Gains" 
                     3 "SOI Sample Top 0.1% Capital Gains" 
                     4 "E-file General Partner 1065 K-1 Gains") 
            col(2) region(lc(white) margin(tiny)))
        xsize(7) ;
    graph export "soi_vs_gp_capgains.pdf", replace;
    #delimit cr
end/*%>*/

capture program drop persistence/*%<*/
program define persistence
/*******************************************************************************
    Appendix Figure 27
	(PI) Persistence of Realized Capital Gains and Other Income Flows
*******************************************************************************/


    foreach topgrp in 1 10 {

        local percentile = 100 - `topgrp'

        use "$inputs/combined/a0_t`percentile'_merged_1yr_persistence", clear        
        gen flow = "divs"

        foreach flow in cg adj int wage {
            append using "$inputs/combined/`flow'_t`percentile'_merged_1yr_persistence"
            
            replace flow = cond("`flow'" == "cg", "kgs", ///
                           cond("`flow'" == "adj", "agi", "`flow'")) if flow == ""
        }

        keep year_one meann weighted_mean flow
        gen group = "P`percentile'-100"

        replace weighted_mean = 100 * weighted_mean
        replace year_one = 100 * year_one

        local xlab = cond(`topgrp' == 1, "99(.1)99.9", "90(1)99")
        local ylab = cond(`topgrp' == 1, "75(5)100", "60(5)100")

        local xsca = cond(`topgrp' == 1, "98.95 99.95", "89.5 99.5")

        #delimit ;
        twoway 
            (connect weighted_mean year_one if flow == "divs", 
                ms(o) lc("$u1") mc("$u1"))
            (connect weighted_mean year_one if flow == "kgs", 
                ms(s) lc("$u3") mc("$u3"))
            (line year_one year_one, lc(gray) lp(1)),
            ytitle("Component Percentile in Subsequent Year") 
            xtitle("Component Percentile in Current Year") 
            xlab(`xlab') xsca(range(`xsca')) 
            ylab(`ylab') ysca(range(72 103)) 
            legend(order(1 "Dividends" 2 "Realized Capital Gains" 3 "45-Degree Line") 
                row(1) region(lc(white)))
            xsize(7.5) $gpr;
        #delimit cr
        graph export "persistence_equity_t`topgrp'.pdf", replace

        foreach flow in "agi" "int" "wage" {
            
            if ("`flow'" == "agi") {
                local varlab = "Adjusted Gross Income"        
            }
            else if ("`flow'" == "int") {
                local varlab = "Taxable Interest Income"        
            }
            else if ("`flow'" == "wage") {
                local varlab = "Wage Income"        
            }

            #delimit ;
            twoway (connect weighted_mean year_one if flow == "`flow'", 
                    ms(o) lc("$u1") mc("$u1"))
                (connect weighted_mean year_one if flow == "kgs", 
                    ms(s) lc("$u3") mc("$u3"))
                (line year_one year_one, lc(gray) lp(1)),
                ytitle("Component Percentile in Subsequent Year") 
                xtitle("Component Percentile in Current Year") 
                xlab(`xlab') xsca(range(`xsca')) 
                ylab(`ylab') ysca(range(72 103)) 
                legend(order(1 "`varlab'" 2 "Realized Capital Gains" 3 "45-Degree Line") 
                    row(1) region(lc(white)))
                xsize(7.5) $gpr;
            #delimit cr
            graph export "persistence_`flow'_t`topgrp'.pdf", replace   
        }
    }
end/*%>*/

capture program drop wealthconcentration_tu/*%<*/
program define wealthconcentration_tu 
/*******************************************************************************
    Appendix Figure 14
	(TU) Wealth concentration at the tax unit level (TU version of figure 11, as
        of 2021-05-19)
*******************************************************************************/

    /***************************************************************************
         Load wealth shares under capitalized specification and keep 
            groups we want; calculate wealth shares of each group; then save as
            tempfiles.
    ***************************************************************************/

    foreach capitalized in preferred equreturns {

        if "`capitalized'" == "preferred" {
            load_preferred_taxdata, units("tu") earlyspec($preferred_defn_early) ///
                midspec($preferred_defn_mid) latespec($preferred_defn_late) 
        }
        else {
            tempfile part
            load_taxdata, units("tu") rankspec($sz_wlth_defn_early_v3) startyr(1966) endyr(1979)
            save `part'
            load_taxdata, units("tu") rankspec($sz_wlth_defn_mid_v3) startyr(1980) endyr(2000)
            append using `part'
            save `part', replace
            load_taxdata, units("tu") rankspec($sz_wlth_defn_late_v3) startyr(2001) endyr(2016)
            append using `part'

            gen hweal_equreturns = cond(year < 1980, hweal$sz_wlth_defn_early_v3, ///
                                cond(year >= 1980 & year < 2001, hweal$sz_wlth_defn_mid_v3, ///
                                    hweal$sz_wlth_defn_late_v3))
        }

        keep if inlist(group, "P0-90", "P90-99", "P99-100")

        isid group year

        #delimit ;
        replace group = cond(group == "P0-90", "_bot90", 
                        cond(group == "P90-99", "_p90_99", 
                        cond(group == "P99-100", "_top1", "alpaca")));
        #delimit cr
        assert group != "alpaca"

        sort year
        by year: egen tthweal_`capitalized' = total(hweal_`capitalized')

        gen wlthshare_`capitalized' = (hweal_`capitalized' / tthweal_`capitalized') * 100

        keep year group wlthshare_`capitalized'

        reshape wide wlthshare_`capitalized', i(year) j(group, string)

        tempfile `capitalized'
        save ``capitalized''
    }

    /***************************************************************************
        Load DFA data and calculate wealth shares
    ***************************************************************************/
        
    import delim "$inputs/dfa-networth-levels-detail.csv", clear

    gen year = real(substr(date, 1, 4)) // Date has quarters
    assert !missing(year) & inrange(year, 1989, 2020)

    #delimit ;
    gen group = cond(inlist(category, "Bottom50", "Next40"), "_bot90", 
                cond(category == "Next9", "_p90_99", 
                cond(category == "Top1", "_top1", "alpaca")));
    #delimit cr
    assert group != "alpaca"    

    * Sum bottom two groups
    collapse (sum) networth, by(year date group)

    * Get annual averages from quarterly data
    collapse (mean) networth, by(year group)

    sort year
    by year: egen ttnetworth = total(networth)

    gen wlthshare_dfa = (networth / ttnetworth) * 100

    keep year group wlthshare_dfa

    reshape wide wlthshare_dfa, i(year) j(group, string)

    tempfile dfa
    save `dfa'

    /***************************************************************************
        Merge capitalized data together with DFA data
    ***************************************************************************/
    
    merge 1:1 year using `preferred'
    assert year > 2016 if _merge == 1
    assert year < 1989 if _merge == 2
    assert _merge == 3 if inrange(year, 1989, 2016)
    drop _merge 

    merge 1:1 year using `equreturns', assert(1 3)
    assert year > 2016 if _merge == 1
    assert _merge == 3 if inrange(year, 1966, 2016)
    drop _merge

    /***************************************************************************
        Plot series
    ***************************************************************************/

    colorpalette "$u3", intensity(0.1(.05)1)
    local u3l "`r(p10)'"
    colorpalette "$u1", intensity(0.1(.05)1)
    local u1l "`r(p10)'"
    colorpalette "$p2", intensity(0.1(.05)1)
    local p2l "`r(p10)'"

    sort year
    #delimit ;
    graph twoway (connected wlthshare_preferred_top1 year, ms(O) color("$u1") lw(thin))
        (connected wlthshare_equreturns_top1 year, ms(o) lc("`u1l'") mc("`u1l'") lw(thin))
        (connected wlthshare_dfa_top1 year, ms(O) color("$f1") lw(medthin) lp("-"))
        (connected wlthshare_preferred_p90_99 year, ms(D) color("$u3") lw(thin))
        (connected wlthshare_equreturns_p90_99 year, ms(dh) lc("`u3l'") mc("`u3l'") lw(thin))
        (connected wlthshare_dfa_p90_99 year, ms(D) color("$f3") lw(medthin) lp("-"))
        (connected wlthshare_preferred_bot90 year, ms(T) color("$p2") lw(thin))
        (connected wlthshare_equreturns_bot90 year, ms(t) lc("`p2l'") mc("`p2l'") lw(thin))
        (connected wlthshare_dfa_bot90 year, ms(T) lc("$u4") mc("$u4") lw(medthin) lp("-"))
        ,
        $gpr
        ytitle("Wealth Share (%)") xtitle("")
        xlab(1965(5)2020) xscale(range(1965 2020))
        legend(order(1 "Top 1% Baseline" 2 "Top 1% Equal Returns" 3 "Top 1% DFA"
                     4 "P90-99 Baseline" 5 "P90-99 Equal Returns" 6 "P90-99 DFA"
                     7 "Bottom 90% Baseline" 8 "Bottom 90% Equal Returns" 9 "Bottom 90% DFA")
            region(lcolor(white) margin(tiny)) rows(3))
        xsize(7.5);
    #delimit cr
    graph export wealthconcentration3groups_tu.pdf, replace

end/*%>*/

capture program drop social_security_aggs/*%<*/
program define social_security_aggs
/*******************************************************************************
    Appendix Figure 29
	(SS) Social Security and Aggregate Wealth
*******************************************************************************/


    /***************************************************************************
        Load in collapses of Social Security wealth (in trillions) from 
        	Catherine Sarin Miller and save as tempfile
    ***************************************************************************/

    insheet using "$inputs/sarin_20200417.csv", comma clear

    keep year total_ss
    rename total_ss total_ssw_csm

    replace total_ssw_csm = total_ssw_csm * 1E12

    tempfile catherinesarinmiller
    save `catherinesarinmiller'

    /***************************************************************************
        Load in aggregate SSW from Sabelhaus and Henriques Volz
    ***************************************************************************/

     import excel using $inputs/ssw_szz.xls, ///
     	sheet("nwdbsort") cellrange(A1:C55) firstrow clear

     isid year wealth_grpy

     collapse (sum) total_ssw_shv = exp_ssw, by(year)

     tempfile sabelhaushenriquesvolz
     save `sabelhaushenriquesvolz'

    /***************************************************************************
        Load in tax data via program; keep only "All" and top 0.1; reshape
        	wide.
    ***************************************************************************/

    load_preferred_taxdata, earlyspec($preferred_defn_early) ///
        midspec($preferred_defn_mid) latespec($preferred_defn_late)

    keep if group == "All"
    keep year hweal_preferred

    rename hweal_preferred total_wealth

    /***************************************************************************
        Merge on Social Security series and national income series, 
            keeping only matched observations.
    ***************************************************************************/

    merge 1:1 year using `catherinesarinmiller', assert(1 3) keep(3) nogen
    merge 1:1 year using `sabelhaushenriquesvolz', keep(1 3) nogen

    merge 1:1 year using natinc.dta, assert(2 3) keep(3) keepusing(Y) nogen

    /***************************************************************************
       Make broad wealth measures (including social security) for 
            aggregate and top 0.1% 
    ***************************************************************************/

    gen total_wealth_incl_ssw_csm = total_wealth + total_ssw_csm
    gen total_wealth_incl_ssw_shv = total_wealth + total_ssw_shv

    /***************************************************************************
        Normalize totals by national income
    ***************************************************************************/

    foreach scalebyni of varlist total_wealth* total_ssw_* {
    	gen `scalebyni'_ni = (`scalebyni' / Y) * 100
    }

    /***************************************************************************
        Make graphs
    ***************************************************************************/

    #delimit ;
    twoway (connect total_wealth_incl_ssw_csm_ni year, color("$p2") ms(d)) 
           (connect total_wealth_incl_ssw_shv_ni year, color("$p2") ms(dh) lp(-)) 
           (connect total_wealth_ni year, ms(O) color("$u1") lw(medthick))
           (connect total_ssw_csm_ni year, ms(s) color("$u3"))
           (connect total_ssw_shv_ni year, ms(sh) color("$u3") lp(-))
        ,
        $gpr
        ytitle("Aggregate Relative to National Income (%)") 
        xtitle(" ")
        xlab(1989(3)2016, labsize(small)) xscale(range(1988 2017))
        ylab(50(150)800) yscale(range(0 810))
        legend(order(3 "Baseline Wealth" 
                     4 "Social Security Wealth (Catherine Miller Sarin 2020)"
                     5 "Social Security Wealth (Sabelhaus Henriques Volz 2020)"
                     1 "Baseline Wealth + SSW (Catherine Miller Sarin 2020)" 
					 2 "Baseline Wealth + SSW (Sabelhaus Henriques Volz 2020)") 
               region(lcolor(white) margin(tiny)) row(5))
        xsize(6.7);
    #delimit cr
    graph export "social_security_aggs.pdf", replace  

end/*%>*/

capture program drop topportfoliototals/*%<*/
program define topportfoliototals
/*******************************************************************************
    Appendix Figure 10
	(PT) Portfolio totals at the top of the wealth distribution
*******************************************************************************/

    /***************************************************************************
        Top 0.1% and 0.001% Wealth Group Levels by Component
    ***************************************************************************/
    
    import delimited using figure13xpose.csv, clear

    rename _varname wlthcat

    drop if inlist(wlthcat, "hweal", "hwpen", "hwhou", "hwoth")
    drop dfa *top1*

    * Scale into trillions so easier to read    
    foreach dollars of varlist preferred* equreturns* scf* {
        replace `dollars' = `dollars' / 1E12
    }

    #delimit ;
    gen catorder1 = cond(wlthcat == "hwfix", 1,
                    cond(wlthcat == "hwequ", 2,
                    cond(wlthcat == "hwbus", 3, 4)));

    gen catorder2 = cond(wlthcat == "hwfix", 1,
                    cond(wlthcat == "ccorw", 2,
                    cond(wlthcat == "pthru", 3, 4)));
    #delimit cr

    label define catorder1 1 "Fixed Income" 2 "Public Equity" 3 "Private Business" 4 "Pensions, Housing, and Other"
    label define catorder2 1 "Fixed Income" 2 "C-corporation Equity" 3 "Pass-throughs" 4 "Pensions, Housing, and Other"
    label values catorder1 catorder1
    label values catorder2 catorder2

    rename (*top01 *top001 *top0001 scftop*plusforbes) (top01_* top001_* top0001_* top*plusforbes_scf) 

    reshape long top01 top001 top0001 top01plusforbes top001plusforbes top0001plusforbes, ///
        i(wlthcat catorder?) j(datasource, string)
    replace datasource = subinstr(datasource, "_", "", 1)

    isid wlthcat datasource

    foreach grp in top01 top001 {

        gen `grp'_equreturns = `grp' if datasource == "equreturns"
        gen `grp'_preferred = `grp' if datasource == "preferred"
        gen `grp'_scf = `grp' if datasource == "scf"

        gen `grp'_forbes = `grp'plusforbes - `grp'_scf if datasource == "scf"

        #delimit ;
        graph bar `grp'_equreturns `grp'_preferred `grp'_scf `grp'_forbes
            , 
            $gpr
            over(datasource, lab(nolab) gap(0.25)) stack 
            over(catorder2, label(labsize(small)))
            blabel(bar, position(center) color(white) format(%9.1fc) size(vsmall))
            bar(1, color("$u3"))
            bar(2, color("$u1"))
            bar(3, color("$p2"))
            bar(4, color("$p2") lcolor("$u4") lw(medthick))
            ytitle("Wealth (Trillions)")
            legend(order(1 "Equal" 2 "Baseline" 3 "SCF" 4 "Forbes")
                region(lcolor(white) margin(tiny)) row(1)) xsize(6);
        #delimit cr
        graph export `grp'portfoliototals_ccorw_pthru.pdf, replace
        graph display, xsize(7.5)
        graph export `topgrp'portfoliototals_ccorw_pthru_slides.pdf, replace 
    }

    /***************************************************************************
         Top 10% and 1% Wealth Group Levels by Component
    ***************************************************************************/
    
        /***********************************************************************
            Retrieve DFA totals for top 1% TU
        ***********************************************************************/

    use $inputs/dfa_revision.dta, clear

    keep if year == 2016 & inlist(category, "Top1", "Next9")

    *gen ccorw_dfa = pubccorp_dfa + privccorp_dfa + equmutf_dfa
    gen pen_hou_oth = hwpen_dfa + hwhou_dfa + hwoth_dfa + othdebt_dfa

    keep year category hwfix_dfa hwbus_dfa pthru_dfa hwequ_dfa ccorw_dfa pen_hou_oth 
    rename *_dfa * 

    gen group = cond(category == "Top1", 1, 9)
    drop category year

    order group

    xpose, clear varname
    assert _varname[1] == "group" & v1[1] == 9 & v2[1] == 1
    rename (v1 v2 _varname) (next9 top1tu wlthcat) 
    drop in 1

    gen top10tu = top1tu + next9
    drop next9

    replace top10tu = top10tu * 1E6 // Scale out of millions into dollars
    replace top1tu = top1tu * 1E6

    gen datasource = "dfa"

    tempfile dfa
    save `dfa'

        /***********************************************************************
            Prepare preferred capitalized totals for TUs and equal 
                split units
        ***********************************************************************/

    foreach units in tu es {
        foreach wlthgrp in top10 top1 {

            load_taxdata, units("`units'") rankspec($preferred_defn_late) year(2016)

            #delimit ;
            local w_group = cond("`wlthgrp'" == "top10", 2, 
                            cond("`wlthgrp'" == "top1", 3, 
                            cond("`wlthgrp'" == "top01", 4, 5)));
            #delimit cr

            keep if w${preferred_defn_late}_group == `w_group' 

            *rename ccorw_9010 ccorw_pref

            assert _N == 1

            assert sign(nonmort) == -1

            gen pen_hou_oth = hwpen_base + hwhou_base + nonmort + miscw_hweal$preferred_defn_late
            drop hwpen_base hwhou_base nonmort

            egen hweal_check1 = rowtotal(hwfix_base hwbus_base pen_hou_oth hwequ_base)
            egen hweal_check2 = rowtotal(hwfix_base pthru_base pen_hou_oth ccorw_base)
            
            if "`wlthgrp'" != "top0001" {
                assert inrange(hweal_check1 / hweal$preferred_defn_late, 0.99, 1.01) 
                assert inrange(hweal_check2 / hweal$preferred_defn_late, 0.99, 1.01) 
            }
            else { // These have to be censored for privacy reasons
               assert inrange(hweal_check1 / hweal$preferred_defn_late, 0.9875, 1.0275)  
               assert inrange(hweal_check2 / hweal$preferred_defn_late, 0.9875, 1.0275)  
            }
            keep hw*_base pthru_base ccorw_base pen_hou_oth

            rename *_base *

            xpose, clear varname
            rename (v1 _varname) (`wlthgrp'`units' wlthcat)

            gen datasource = "preferred"

            tempfile preferred`wlthgrp'`units'
            save `preferred`wlthgrp'`units''
        }
    }

        /***********************************************************************
             Prepare PSZ 18 capitalized totals for TUs and equal split 
                units
        ***********************************************************************/

    foreach units in es tu {
        foreach wlthgrp in top10 top1 {

            load_taxdata, units("`units'") rankspec($sz_wlth_defn_late_v3) year(2016)

            local w_group = cond("`wlthgrp'" == "top10", 2, 3)
            qui keep if w${sz_wlth_defn_late_v3}_group == `w_group'
            
            qui gen hwequ = 0.8 * ccorw
            qui gen pthru = scorw + partw_sz20_scaled + solepropw_sz20_scaled
            qui gen hwbus = pthru + (0.2 * ccorw)
            qui gen hwfix = taxbond + taxbond_mufmisc_sz + muni + currency
            
            capture drop hwhou 
            qui gen hwhou = rentalhome + ownerhome + ownermort + rentalmort

            qui gen pen_hou_oth = hwpen_ini + hwhou + nonmort_ini
            
            qui egen hweal_chk1 = rowtotal(hwfix hwbus pen_hou_oth hwequ)
            qui egen hweal_chk2 = rowtotal(hwfix pthru pen_hou_oth ccorw)

            #delimit ;
            qui egen hweal_chk3 = rsum(ccorw scorw taxbond taxbond_mufmisc_sz muni currency 
                                       rentalhome ownerhome partw_sz20_scaled
                                       solepropw_sz20_scaled
                                       hwpen_ini ownermort rentalmort nonmort_ini);
            #delimit cr

            if "`wlthgrp'" != "top0001" { 
                assert inrange(hweal_chk1 / hweal$sz_wlth_defn_late_v3, 0.99, 1.01)
                assert inrange(hweal_chk2 / hweal$sz_wlth_defn_late_v3, 0.99, 1.01)
                assert inrange(hweal_chk3 / hweal$sz_wlth_defn_late_v3, 0.99, 1.01)
            }
            else { // These have to be censored for privacy reasons
                assert inrange(hweal_chk1 / hweal$sz_wlth_defn_late_v3, 0.9875, 1.0275)
                assert inrange(hweal_chk2 / hweal$sz_wlth_defn_late_v3, 0.9875, 1.0275)
                assert inrange(hweal_chk3 / hweal$sz_wlth_defn_late_v3, 0.9875, 1.0275)
            }

            keep hwfix hwbus pthru pen_hou_oth hwequ ccorw

            xpose, clear varname
            rename (v1 _varname) (`wlthgrp'`units' wlthcat)

            gen datasource = "equreturns"

            tempfile equreturns`wlthgrp'`units'
            qui save `equreturns`wlthgrp'`units''
        }
    }

        /***********************************************************************
             Prepare SCF totals for TUs and equal split units
        ***********************************************************************/

    foreach units in es tu {

        use $inputs/scf_revision.dta, clear

        * QJE 3 edits to networth concept
        drop networth_pref
        gen networth_pref = networth + funded_pen_db - vehic - durables
        replace hwpen = hwpen - tot_pen_db + funded_pen_db
        replace hwoth = hwoth - veh_inst

        gen pen_hou_oth = hwpen + hwhou + hwoth + othdebt

        if "`units'" == "es" {
            es_rank_scf, rankvar(networth_pref) ///
                othersplitvars("hwequ hwfix hwbus pen_hou_oth ccorw pthru")
        }
        else {
            assert "`units'" == "tu"
            tu_rank_scf, rankvar(networth_pref)
        }

        tempfile orig 
        save `orig'

        foreach wlthgrp in top10 top1 {

            use `orig', clear

            local threshold = cond("`wlthgrp'" == "top10", 0.9, 0.99)
            keep if `units'_rank >= `threshold' & year == 2016

            egen networth_check1 = rowtotal(hwequ hwfix hwbus pen_hou_oth)
            egen networth_check2 = rowtotal(ccorw hwfix pthru pen_hou_oth)
            assert inrange(networth_pref / networth_check1, 0.999, 1.001) & ///
                inrange(networth_pref / networth_check2, 0.999, 1.001)

            collapse (sum) hwequ ccorw hwfix hwbus pthru pen_hou_oth [fw = wgt1B]

            assert _N == 1

            foreach billionX of varlist hw* ccorw pthru pen_hou_oth {
                replace `billionX' = `billionX' / 1E9
            }

            xpose, clear varname
            rename (v1 _varname) (`wlthgrp'`units' wlthcat)

            isid wlthcat

            gen datasource = "scf"

            tempfile scf`wlthgrp'`units'
            save `scf`wlthgrp'`units''
        }
    }

        /***********************************************************************
             Prepare Forbes totals
        ***********************************************************************/

    import delim forbesportfolios.csv, clear

    rename *_f400 * 
    drop hweal hwpen hwhou hwoth

    xpose, clear varname

    rename (v1 _varname) (forbes wlthcat)

    gen datasource = "scf"

    tempfile forbes
    save `forbes'

        /***********************************************************************
            Merge everything together
        ***********************************************************************/

    clear

    foreach datasource in scf equreturns preferred {
        append using ``datasource'top1tu'
        merge 1:1 datasource wlthcat using ``datasource'top1es', update assert(1 3 4) nogen
        merge 1:1 datasource wlthcat using ``datasource'top10tu', update assert(1 3 4) nogen
        merge 1:1 datasource wlthcat using ``datasource'top10es', update assert(1 3 4) nogen
    }

    merge 1:1 datasource wlthcat using `forbes', assert(1 3) nogen

    append using `dfa'

        /***********************************************************************
            Finagle in preparation to make graphs
        ***********************************************************************/

    * Scale into trillions so easier to read    
    ds wlthcat datasource, not
    foreach dollars of varlist `r(varlist)' {
        replace `dollars' = `dollars' / 1E12
    }

    #delimit ;
    gen catorder1 = cond(wlthcat == "hwfix", 1,
                    cond(wlthcat == "hwequ", 2,
                    cond(wlthcat == "hwbus", 3, 4)));
    
    gen catorder2 = cond(wlthcat == "hwfix", 1,
                    cond(wlthcat == "ccorw", 2,
                    cond(wlthcat == "pthru", 3, 4)));

    gen dataorder = cond(datasource == "equreturns", 1, 
                    cond(datasource == "preferred", 2, 
                    cond(datasource == "scf", 3, 4)));
    #delimit cr

    label define catorder1 1 "Fixed Income" 2 "Public Equity" 3 "Private Business" 4 "Pensions, Housing, and Other"
    label define catorder2 1 "Fixed Income" 2 "C-corporation Equity" 3 "Pass-throughs" 4 "Pensions, Housing, and Other"
    label values catorder1 catorder1
    label values catorder2 catorder2

        /***********************************************************************
            Cycling through top groups, output graphs
        ***********************************************************************/

    foreach grp in top1 top10 {

            /*******************************************************************
                Equal split graphs
            *******************************************************************/

        gen `grp'_equreturns = `grp'es if datasource == "equreturns"
        gen `grp'_preferred = `grp'es if datasource == "preferred"
        gen `grp'_scf = `grp'es if datasource == "scf"

        #delimit ;
        graph bar `grp'_equreturns `grp'_preferred `grp'_scf forbes
            , 
            $gpr
            over(datasource, lab(nolab) gap(0.25)) stack 
            over(catorder2, label(labsize(small)))
            blabel(bar, position(center) color(white) format(%9.1fc) size(vsmall))
            bar(1, color("$u3"))
            bar(2, color("$u1"))
            bar(3, color("$p2"))
            bar(4, color("$p2") lcolor("$u4") lw(medthick))
            ytitle("Wealth (Trillions)")
            legend(order(1 "Equal" 2 "Baseline" 3 "SCF" 4 "Forbes")
                region(lcolor(white) margin(tiny)) row(1)) xsize(6);
        #delimit cr
        graph export `grp'es_portfoliototals_ccorw_pthru.pdf, replace
        graph display, xsize(7.5)
        graph export `grp'es_portfoliototals_ccorw_pthru_slides.pdf, replace

            /*******************************************************************
                Tax unit graphs, with DFA
            *******************************************************************/

        colorpalette "$p2", intensity(0.1(.05)1)
        local p2l "`r(p6)'"

        replace `grp'_equreturns = `grp'tu if datasource == "equreturns"
        replace `grp'_preferred = `grp'tu if datasource == "preferred"
        replace `grp'_scf = `grp'tu if datasource == "scf"     
        capture gen `grp'_dfa = `grp'tu if datasource == "dfa"

        #delimit ;
        graph bar `grp'_equreturns `grp'_preferred `grp'_scf forbes `grp'_dfa
            , 
            $gpr
            over(dataorder, lab(nolab) gap(0.25)) stack 
            over(catorder2, label(labsize(small)))
            blabel(bar, position(center) color(white) format(%9.1fc) size(vsmall))
            bar(1, color("$u3"))
            bar(2, color("$u1"))
            bar(3, color("$p2"))
            bar(4, color("$p2") lcolor("$u4") lw(medthick))
            bar(5, color("`p2l'") lcolor("$f4") lw(medthick))
            ytitle("Wealth (Trillions)")
            legend(order(1 "Equal" 2 "Baseline" 3 "SCF" 4 "Forbes" 5 "DFA")
                region(lcolor(white) margin(tiny)) col(3)) xsize(6);
        #delimit cr
        graph export `grp'tu_portfoliototals_ccorw_pthru.pdf, replace
        graph display, xsize(7.5)
        graph export `grp'tu_portfoliototals_ccorw_pthru_slides.pdf, replace
    }

end/*%>*/

capture program drop wealthcompovertime_appx/*%<*/
program define wealthcompovertime_appx

/*******************************************************************************
    Appendix Figures 11 and 12
	(C) Portfolio Composition Over Time in Shares of Total Wealth and in Levels
        (Appendix version of figure 14)
*******************************************************************************/

    insheet using wealthcompovertime.csv, clear

    sort year
    foreach sharevar in hwfix hwequ ccorw hwbus pthru hwpen hwhou hwoth pen_hou_oth {
    
        #delimit ;
        local cmptname = cond("`sharevar'" == "hwfix", "Fixed Income", 
                         cond("`sharevar'" == "hwequ", "Public Equity", 
                         cond("`sharevar'" == "ccorw", "C-corporation Equity", 
                         cond("`sharevar'" == "hwbus", "Private Business", 
                         cond("`sharevar'" == "pthru", "Pass-through Business", 
                         cond("`sharevar'" == "hwhou", "Housing", 
                         cond("`sharevar'" == "hwpen", "Pensions",
                         cond("`sharevar'" == "hwoth", "Other", "Pensions, Housing, and Other"))))))));
        #delimit cr

        foreach dfagrp in top1 {
            local grplab = "Top 1%"
            
            #delimit ;
            twoway (connect `sharevar'_tsh_preferred_`dfagrp' year, ms(o) color("$u1") lwidth(medthick))
                (connect `sharevar'_tsh_psz18_`dfagrp' year, ms(s) color("$u3") lpattern(-))
                (connect `sharevar'_tsh_scf_`dfagrp' year, ms(t) color("$p2") lwidth(thin))
                (connect `sharevar'_tsh_dfa_`dfagrp' year, ms(d) lc("$u4") mc("$u4") lpattern("-."))

                ,
                $gpr 
                xlab(1960(20)2020)
                yscale(range(0 14.5)) ylab(0(2)14)
                ytitle("`grplab' `cmptname' Sh Total Wealth (%)") xtitle("")
                legend(order(2 "PSZ 2018" 1 "Preferred" 3 "SCF + F400" 4 "DFA") 
                        size(medlarge) region(lcolor(white) margin(tiny)) row(1))

                xsize(6.45);
            graph export `sharevar'_tshares_`dfagrp'.pdf, replace;

            twoway (connect `sharevar'_preferred_`dfagrp' year, ms(o) color("$u1") lwidth(medthick))
                (connect `sharevar'_psz18_`dfagrp' year, ms(s) color("$u3") lpattern(-))
                (connect `sharevar'_scf_`dfagrp' year, ms(t) color("$p2") lwidth(thin))
                (connect `sharevar'_dfa_`dfagrp' year, ms(d) lc("$u4") mc("$u4") lpattern("-."))

                ,
                $gpr 
                xlab(1960(20)2020)
                yscale(range(0 14.5)) ylab(0(2)14)
                ytitle("`grplab' `cmptname' (trillions)") xtitle("")
                legend(order(2 "PSZ 2018" 1 "Preferred" 3 "SCF + F400" 4 "DFA") 
                        size(medlarge) region(lcolor(white) margin(tiny)) row(1))

                xsize(6.45);
            graph export `sharevar'_total_`dfagrp'.pdf, replace;
            #delimit cr
        }

        foreach grp in top01 top001 {
            
            local grplab = cond("`grp'" == "top01", "Top 0.1%", ///
                           cond("`grp'" == "top001", "Top 0.01%", "Top 0.001%"))

            local startyr = cond("`grp'" == "t0001", 1989, 1966)
            local labstart = cond("`grp'" == "t0001", 1990, 1960)

            #delimit ;
            twoway (connect `sharevar'_tsh_preferred_`grp' year, ms(o) color("$u1") lwidth(medthick))
                (connect `sharevar'_tsh_psz18_`grp' year, ms(s) color("$u3") lpattern(-))
                (connect `sharevar'_tsh_scf_`grp' year, ms(t) color("$p2") lwidth(thin))
                if inrange(year, `startyr', 2020)
                ,
                $gpr 
                xlab(`labstart'(20)2020)
                yscale(range(0 14.5)) ylab(0(2)14)
                ytitle("`grplab' `cmptname' Sh Total Wealth (%)") xtitle("")
                legend(order(2 "PSZ 2018" 1 "Preferred" 3 "SCF + F400") 
                        size(large) region(lcolor(white) margin(tiny)) row(1))

                xsize(6.45);
            graph export `sharevar'_tshares_`grp'.pdf, replace;

            twoway (connect `sharevar'_preferred_`grp' year, ms(o) color("$u1") lwidth(medthick))
                (connect `sharevar'_psz18_`grp' year, ms(s) color("$u3") lpattern(-))
                (connect `sharevar'_scf_`grp' year, ms(t) color("$p2") lwidth(thin))
                if inrange(year, `startyr', 2020)
                ,
                $gpr 
                xlab(`labstart'(20)2020)
                yscale(range(0 14.5)) ylab(0(2)14)
                ytitle("`grplab' `cmptname' (trillions)") xtitle("")
                legend(order(2 "PSZ 2018" 1 "Preferred" 3 "SCF + F400") 
                        size(large) region(lcolor(white) margin(tiny)) row(1))

                xsize(6.45);
            graph export `sharevar'_total_`grp'.pdf, replace;
            #delimit cr
        }
    }
end/*%>*/

capture program drop scfadj/*%<*/
program define scfadj

    /***************************************************************************
        Appendix Figure 1
		(ASCF) Cycling through ranking adjustment and modification specs, make 
            top shares tempfiles.
    ***************************************************************************/
    
    foreach rankadj in es tu {

        local numadj = cond("`rankadj'" == "es", 5, 4)
        local firstnonranking = `numadj' - 2

        forv adjnum = 0 / `numadj' {
        
        /***********************************************************************
             Load in SCF microfile and modify net worth concept as  
                required according to direction in question. Create ranking  
                variable based on this net worth concept.
        ***********************************************************************/
        
            use $inputs/scf_revision.dta, clear

            if `adjnum' == `firstnonranking' {
                qui replace networth = networth + tot_pen_db
            }
            else if `adjnum' > `firstnonranking' {
                
                qui gen check = networth + tot_pen_db - (vehic - veh_inst) - durables
                qui replace networth = networth_pref
                
                assert inrange(networth / check, 0.999, 1.001) | abs(check - networth) < 1
                drop check
            }
            
            /* Only split married couples in two; no rescaling to match tax data 
                counts */
            if "`rankadj'" == "es" & `adjnum' == 1 {
                qui expand 2 if married == 1

                qui replace networth = networth / 2 if married == 1

                qui cumul networth [aw = wgt], by(year) gen(wlthrank)
            }
            else if `adjnum' > 0 { // Full adjustment with rescaling
                `rankadj'_rank_scf, rankvar(networth) outname(wlthrank)
            }
            else { // Zero is baseline
                qui cumul networth [aw = wgt], by(year) gen(wlthrank)
            }
            
        /***********************************************************************
            Weight net worth measure in preparation for collapse, 
               then make collapses by year for top 1 and 0.1% of 
               TU-adjusted raw net worth distribution.
        ***********************************************************************/
        
            /* Group 1 = bottom 99%, Group 2 = P99-99.9, Group 3 = P99.9-99.99, 
                Group 4 = top 0.01% */
            #delimit ;
            qui gen group = cond(wlthrank < 0.99, 1, 
                            cond(wlthrank < 0.999, 2, 
                            cond(wlthrank < 0.9999, 3, 4)));
            #delimit cr

            collapse (sum) networth [fw = wgt1B], by(year group)
            
            qui replace networth = networth / 1E9 // Undo weighting

            qui reshape wide networth, i(year) j(group)
            
            egen total_wealth = rowtotal(networth?)

            rename networth4 networth_top001
            gen networth_top01 = networth3 + networth_top001
            gen networth_top1 = networth2 + networth_top01
            drop networth?
            
            if `adjnum' == `numadj' {
                merge 1:1 year using $inputs/forbeswlth.dta, assert(3) nogen

                foreach addforbes of varlist total_wealth networth_top* {
                    replace `addforbes' = `addforbes' + forbeswlth
                }
                drop forbes*
            }        

            foreach topgrp in top1 top01 {
                gen sh_adj`adjnum'_`topgrp' = (networth_`topgrp' / total_wealth) * 100
            }

            sort year 

            tempfile adj`adjnum'`rankadj'
            qui save `adj`adjnum'`rankadj''
        }

    /***************************************************************************
        Merge together
    ***************************************************************************/

        use `adj0`rankadj'', clear

        forv adj = 1 / `numadj' {
            qui merge 1:1 year using `adj`adj'`rankadj'', assert(3) nogen
        }

        export delim progressive_adj_scf_`rankadj'.csv, replace
    }

    /***************************************************************************
        Reformat a bit (requires quite a bit of fineggling actually) and
            make waterfall graph
    ***************************************************************************/

        /***********************************************************************
             TU (four adjustments)
        ***********************************************************************/

    import delim progressive_adj_scf_tu.csv, clear 

    keep if year == 2016

    reshape long sh_adj0 sh_adj1 sh_adj2 sh_adj3 sh_adj4, i(year) j(topgrp, string)

    replace topgrp = subinstr(topgrp, "_", "", .)
    drop year

    forv adjnum = 1 / 4 {
        local prevadj = `adjnum' - 1 
        gen diff_adj`adjnum' = round(sh_adj`adjnum' - sh_adj`prevadj', 0.01)
    }

    reshape long sh_adj diff_adj, i(topgrp) j(adjnum)

    by topgrp: gen diffy = (sh_adj + sh_adj[_n - 1]) / 2

    expand 2 if adjnum != 4

    sort topgrp adjnum
    bys topgrp adjnum: replace adjnum = adjnum + 1 if _n == 2
    bys topgrp adjnum: gen alt_adjnum = cond(_n == 2 | adjnum == 0, adjnum, adjnum - 1)
    bys topgrp adjnum: replace diff_adj = . if _n == 1

    tostring diff_adj, replace force
    replace diff_adj = subinstr(diff_adj, ".", "0.", 1) if abs(real(diff_adj)) < 1
    replace diff_adj = substr(diff_adj, 1, strpos(diff_adj, ".") + 2)
    replace diff_adj = "+" + diff_adj if real(diff_adj) > 0 & diff_adj != "."
    replace diff_adj = "" if diff_adj == "."

    foreach topgrp in top1 top01 {

        local ylab = cond("`topgrp'" == "top1", "32(4)44", "12(1)17")
        local grplab = substr(subinstr("`topgrp'", "p0", "p0.", 1), 4, .)

        #delimit ;
        twoway (scatter sh_adj adjnum if topgrp == "`topgrp'" & adjnum == 0, col("$u1") ms(O))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & alt_adjnum == 0,
                ms(none) lcol(gs5) lp(-))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & adjnum == 1, col("$u3") ms(O))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & alt_adjnum == 1,
                ms(none) lcol(gs5) lp(-))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & adjnum == 2, col("$p2") ms(O))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & alt_adjnum == 2,
                ms(none) lcol(gs5) lp(-))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & adjnum == 3, col("$f1") ms(O))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & alt_adjnum == 3,
                ms(none) lcol(gs5) lp(-))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & adjnum == 4, col("$f3") ms(O))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & alt_adjnum == 4,
                ms(none) lc(gs5) lp(-))
            (scatter diffy adjnum if topgrp == "`topgrp'", ms(none) mlab(diff_adj)
                mlabpos(9) mlabcolor(blk))
            ,
            $gpr
            xtitle("Adjustments") 
            ytitle("2016 top `grplab'% share of total net worth")
            xscale(range(-0.25 4.25))
            ylab(`ylab') 
            xlab(0 "Raw" 1 "+ TU ranks" 2 "+ DB wlth" 3 "+ excl. durables" 4 "+ F400")
            legend(off)
            xsize(6);
        #delimit cr
        graph export progressive_adj_scf_tu_`topgrp'.pdf, replace
    }

        /***********************************************************************
            ES (five adjustments)
        ***********************************************************************/

    import delim progressive_adj_scf_es.csv, clear 

    keep if year == 2016

    reshape long sh_adj0 sh_adj1 sh_adj2 sh_adj3 sh_adj4 sh_adj5, i(year) j(topgrp, string)

    replace topgrp = subinstr(topgrp, "_", "", .)
    drop year

    forv adjnum = 1 / 5 {
        local prevadj = `adjnum' - 1 
        gen diff_adj`adjnum' = round(sh_adj`adjnum' - sh_adj`prevadj', 0.01)
    }

    reshape long sh_adj diff_adj, i(topgrp) j(adjnum)

    by topgrp: gen diffy = (sh_adj + sh_adj[_n - 1]) / 2

    expand 2 if adjnum != 5

    sort topgrp adjnum
    bys topgrp adjnum: replace adjnum = adjnum + 1 if _n == 2
    bys topgrp adjnum: gen alt_adjnum = cond(_n == 2 | adjnum == 0, adjnum, adjnum - 1)
    bys topgrp adjnum: replace diff_adj = . if _n == 1

    tostring diff_adj, replace force
    replace diff_adj = subinstr(diff_adj, ".", "0.", 1) if abs(real(diff_adj)) < 1
    replace diff_adj = substr(diff_adj, 1, strpos(diff_adj, ".") + 2)
    replace diff_adj = "+" + diff_adj if real(diff_adj) > 0 & diff_adj != "."
    replace diff_adj = "" if diff_adj == "."

    foreach topgrp in top1 top01 {

        local ylab = cond("`topgrp'" == "top1", "32(4)44", "12(1)17")
        local grplab = substr(subinstr("`topgrp'", "p0", "p0.", 1), 4, .)
        local ranklab = upper("`rankadj'")

        #delimit ;
        twoway (scatter sh_adj adjnum if topgrp == "`topgrp'" & adjnum == 0, col("$u1") ms(O))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & alt_adjnum == 0,
                ms(none) lcol(gs5) lp(-))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & adjnum == 1, col("$u5") ms(O))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & alt_adjnum == 1,
                ms(none) lcol(gs5) lp(-))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & adjnum == 2, col("$u3") ms(O))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & alt_adjnum == 2,
                ms(none) lcol(gs5) lp(-))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & adjnum == 3, col("$p2") ms(O))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & alt_adjnum == 3,
                ms(none) lcol(gs5) lp(-))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & adjnum == 4, col("$f1") ms(O))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & alt_adjnum == 4,
                ms(none) lcol(gs5) lp(-))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & adjnum == 5, col("$f3") ms(O))
            (connect sh_adj adjnum if topgrp == "`topgrp'" & alt_adjnum == 5,
                ms(none) lcol(gs5) lp(-))
            (scatter diffy adjnum if topgrp == "`topgrp'", ms(none) mlab(diff_adj)
                mlabpos(9) mlabcolor(blk))
            ,
            $gpr
            xlab(0 "Raw SCF" 1 "+ split couples" 2 "+ ES scaling" 3 "+ DB wlth" 
                 4 "+ excl. durables" 5 "+ F400") 
            xscale(range(-0.25 5.25))
            xtit("Adjustments") ytit("2016 top `grplab'% share of total net worth")
            ylab(`ylab') legend(off) 
            xsize(6);
        #delimit cr 
        graph export progressive_adj_scf_es_`topgrp'.pdf, replace
    }

    /***************************************************************************
        Make a simplified time series graph
    ***************************************************************************/

        /***********************************************************************
            Make time series of wealth sh using networth + DB wealth
        ***********************************************************************/

    use $inputs/scf_revision.dta, clear

    gen networth_db = networth + tot_pen_db

    cumul networth_db [aw = wgt], by(year) gen(db_rank)

    /* Group 1 = bottom 99%, Group 2 = P99-99.9, Group 3 = P99.9-99.99, 
        Group 4 = top 0.01% */
    #delimit ;
    qui gen group = cond(db_rank < 0.99, 1, 
                    cond(db_rank < 0.999, 2, 
                    cond(db_rank < 0.9999, 3, 4)));
    #delimit cr

    collapse (sum) networth = networth_db [fw = wgt1B], by(year group)

    qui replace networth = networth / 1E9 // Undo weighting

    qui reshape wide networth, i(year) j(group)

    egen total_wealth = rowtotal(networth?)

    rename networth4 networth_top001
    gen networth_top01 = networth3 + networth_top001
    gen networth_top1 = networth2 + networth_top01
    drop networth?

    foreach topgrp in top1 top01 top001 {
        gen sh_adjdb_`topgrp' = (networth_`topgrp' / total_wealth) * 100
    }

    tempfile adjdb 
    save `adjdb'

        /***********************************************************************
            Make time series of wealth shares using networth 
                including Forbes 400 wealth at top 
        ***********************************************************************/

    use $inputs/scf_revision.dta, clear

    cumul networth [aw = wgt], by(year) gen(rank)

    /* Group 1 = bottom 99%, Group 2 = P99-99.9, Group 3 = P99.9-99.99, 
        Group 4 = top 0.01% */
    #delimit ;
    qui gen group = cond(rank < 0.99, 1, 
                    cond(rank < 0.999, 2, 
                    cond(rank < 0.9999, 3, 4)));
    #delimit cr

    collapse (sum) networth [fw = wgt1B], by(year group)

    qui replace networth = networth / 1E9 // Undo weighting

    qui reshape wide networth, i(year) j(group)

    merge 1:1 year using $inputs/forbeswlth.dta, assert(3) nogen

    egen total_wealth = rowtotal(networth? forbeswlth)

    gen networth_top001 = networth4 + forbeswlth
    gen networth_top01 = networth3 + networth_top001
    gen networth_top1 = networth2 + networth_top01
    drop networth?

    foreach topgrp in top1 top01 top001 {
        gen sh_adjf400_`topgrp' = (networth_`topgrp' / total_wealth) * 100
    }

    tempfile adjf400
    save `adjf400'

        /***********************************************************************
             Import preferred tax data series
        ***********************************************************************/

    foreach units in es tu {

        load_preferred_taxdata, units("`units'") earlyspec($preferred_defn_early) ///
            midspec($preferred_defn_mid) latespec($preferred_defn_late)

        keep if inlist(group, "All", "P99-100", "P99.9-100")
        keep year group hweal_preferred

        replace group = cond(group == "All", "_total", cond(group == "P99-100", "_top1", "_top01"))

        reshape wide hweal_preferred, i(year) j(group, string)

        foreach topgrp in top1 top01 {
            gen sh_taxdata`units'_`topgrp' = (hweal_preferred_`topgrp' / hweal_preferred_total) * 100 
        }
        drop hweal_preferred*

        tempfile taxdata_`units'
        save `taxdata_`units''
    }

        /***********************************************************************
             Import baseline, TU-adjusted only, ES-adjusted only, and 
                final concentration graphs from collapses for progressive 
                adjustment subfigures and  merge on collapses created earlier.
        ***********************************************************************/

    foreach rankadj in es tu {
        local numadj = cond("`rankadj'" == "es", 5, 4)

        import delim progressive_adj_scf_`rankadj'.csv, clear

        keep year sh_adj0* sh_adj1* sh_adj`numadj'*

        rename (sh_adj0* sh_adj1* sh_adj`numadj'*) (sh_baseline* sh_adj`rankadj'* sh_adjfinal`rankadj'*)

        tempfile csvshares`rankadj'
        save `csvshares`rankadj''
    }

    use `csvsharestu', clear

    merge 1:1 year using `csvshareses', assert(3) nogen
    merge 1:1 year using `adjdb', assert(3) nogen
    merge 1:1 year using `adjf400', assert(3) nogen
    merge 1:1 year using `taxdata_es', nogen
    merge 1:1 year using `taxdata_tu', nogen

    drop if year < 1989
    sort year

        /***********************************************************************
             Make graphs
        ***********************************************************************/

    foreach topgrp in top1 top01 {

        local grplab = substr(subinstr("`topgrp'", "p0", "p0.", 1), 4, .)

        sort year
        #delimit ;
        twoway (connect sh_baseline_`topgrp' year, ms(o) color("$p2") lw(medthick))
            (connect sh_adjdb_`topgrp' year, ms(d) color(gs8) lw(medthin) lp("-"))
            (connect sh_adjtu_`topgrp' year, ms(s) color(gs8) lw(medthin) lp("-"))
            (connect sh_adjes_`topgrp' year, ms(+) color(gs8) lw(medthin) lp("-"))
            (connect sh_adjf400_`topgrp' year, ms(x) color(gs8) lw(medthin) lp("-"))
            (connect sh_adjfinaltu_`topgrp' year, ms(Th) msize(large) color("$p2") lw(thick) lp(_))
            (connect sh_adjfinales_`topgrp' year, ms(T) msize(large) color("$p2") lw(thick))
            (connect sh_taxdataes_`topgrp' year, ms(O) msize(large) color("$u1") lw(thick))
            (connect sh_taxdatatu_`topgrp' year, ms(Oh) msize(large) color("$u1") lw(thick) lp(_))
            ,
            $gpr 
            ytitle("Top `grplab'% share of total net worth")
            xlab(1989(3)2019) xscale(range(1988 2020))
            legend(order(1 "Raw SCF" 2 "Incl. DB" 5 "Incl. F400" 3 "TU adj." 
                         4 "ES adj." 7 "All adj. ES" 6 "All adj. TU" 
                         8 "Baseline ES" 9 "Baseline TU") 
                row(2) region(lc(white) margin(tiny)) size(small))
             xsize(8);
        #delimit cr
        graph export simplifiedadj_scf_`topgrp'.pdf, replace
    }
end/*%>*/

capture program drop validate_housing/*%<*/
program define validate_housing
/*******************************************************************************
    Appendix Figure 31
	(VH) Validating Housing Capitalization Approach
*******************************************************************************/

    /***************************************************************************
         Panel A: Housing Assets Values Match Financial Accounts
    ***************************************************************************/

        /***********************************************************************
            Load housing factors data file; just want national 
                aggregate value for each year
        ***********************************************************************/

    use $inputs/housing_factors_ds.dta, clear

    isid year state

    collapse (firstnm) gross_hou_cl = agg_reval, by(year)

    replace gross_hou_cl = gross_hou_cl / 1E6

    keep if year <= 2016

    tempfile corelogic
    save `corelogic'

        /***********************************************************************
             Load in USFA aggs of gross housing from parameters.csv
        ***********************************************************************/

    load_analysis_data parameters_new

    drop if missing(ttrentmortw)
    assert sign(ttrentmortw) == -1

    * ttrentw is net housing, so put rental mortgages back in
    gen gross_hou_usfa = (ttrestw + ttrentw - ttrentmortw) / 1E6 // scale down by 1M

    keep year gross_hou_usfa
    keep if year <= 2016

        /***********************************************************************
             Merge data files together
        ***********************************************************************/

    merge 1:1 year using `corelogic', assert(1 3) keep(3) nogen

        /***********************************************************************
            Make graphs
        ***********************************************************************/

    sort year

    #delimit ;
    twoway (connect gross_hou_cl year, ms(o) color("$u1"))  
        (connect gross_hou_usfa year, ms(o) color("$u3"))
        ,
        $gpr 
        xtitle(" ") ytitle("Aggregate value (Trillions USD)") 
        xlab(1975(5)2015) xscale(range(1975 2016))
        legend(order(1 "CoreLogic Assessor and House Price Index" 
                     2 "US Financial Accounts Aggregates (Owner + Rental)")
            row(2) region(lcolor(white) margin(tiny)));
    #delimit cr
    graph export scatter_compare_housing_aggregates.pdf, replace

    /***************************************************************************
        Panel B: State Property Tax Rates Match the American Community 
            Survey
    ***************************************************************************/

        /***********************************************************************
             Load in crosswalk between state abbreviations and FIPS codes; save 
                as tempfile to be merged to our housing factors dataset.
        ***********************************************************************/

    use $inputs/irscode_fips_xwalk.dta,clear
    
    rename fips statefip
    
    tempfile xwalk
    save `xwalk'

        /***********************************************************************
             Prep state-by-state sheet of property tax rates. Load in housing 
                factors data set, restrict to ACS years and divide factor by 100
                to yield prop tax rate. Drop if state is DC -- no data in ACS. Merge 
                to crosswalk from 1 to get state FIPS codes, then save as tempfile.
        ***********************************************************************/

    use $inputs/housing_factors_ds.dta, clear

    keep if inlist(year, 1990, 2000, 2010)

    gen proptax_rate_pref = 100 / factor

    rename state stateabbrev

    drop if stateabbrev == "DC" // not in ACS data

    merge m:1 stateabbrev using `xwalk', assert(2 3) keep(3) keepusing(statefip) nogen

    tempfile proptax_pref
    save `proptax_pref'
 
         /***********************************************************************
            Load in ACS property tax rates. Scale up property tax rates by 100, 
                then merge by year and state to preferred property tax data.
        ***********************************************************************/

    use "$inputs/acsproptax/proptax_rate_idx.dta", clear
    
    keep if inlist(year, 1990, 2000, 2010)

    gen proptax_rate_acs = mean_prop * 100

    merge 1:1 year statefip using `proptax_pref', ///
        assert(3) nogen keepusing(proptax_rate_pref stateabbrev)

    keep year stateabbrev proptax_rate*

        /***********************************************************************
             Make graphs
        ***********************************************************************/

    gen mlab = stateabbrev + ", " + string(year)  

    #delimit ;
    twoway (scatter proptax_rate_acs proptax_rate_pref if year == 1990, 
            msym(none) mlab(mlab) mlabpos(0) mlabcolor("$u1"))  
        (scatter proptax_rate_acs proptax_rate_pref if year == 2000, 
            msym(none) mlab(mlab) mlabpos(0) mlabcolor("$u3"))
        (scatter proptax_rate_acs proptax_rate_pref if year == 2010, 
            msym(none) mlab(mlab) mlabpos(0) mlabcolor("$p2"))
        (function y=x,  range(0.3 2.5) lcolor(black) lpattern(-))
        ,
        $gpr xtitle("Preferred property tax rates") 
        ytitle("ACS property tax rates") legend(off);
    #delimit cr
    graph export scatter_compare_proptaxrates.pdf, replace
end/*%>*/

capture program drop top01wealthmortsensitivity/*%<*/
program define top01wealthmortsensitivity
/*******************************************************************************
    Appendix Figure 34
	(S) Sensitivity of Age Group Wealth Shares to Mortality Rates in the Top 
        0.1%
*******************************************************************************/
    
    insheet using $inputs/MGS_emails/20200422/fig_a17v2.csv, clear

    replace share_top01 = share_top01 * 100
    replace sensitivity_top01 = sensitivity_top01 * 100 

    keep max_age *_top01
    sort max_age

    #delimit ;
    twoway (connect share_top01 max_age, ms(o) color("$u1")) 
        (connect sensitivity_top01 max_age, ms(s) color("$u3")) 
        ,
        $gpr
        ytitle("Percentage points (%)") xtitle("")
        xscale(range(5 110)) xlab(10(10)110)
        legend(order(1 "Wealth share of top 0.1% age sub-group"
                     2 "Change in wealth share from increasing mortality rate 0.1pp")
               row(2) region(lc(white) margin(tiny)));
    #delimit cr
    graph export estate_topsh_mortality_sensitivity.pdf, replace
end/*%>*/

capture program drop estate_top01shares/*%<*/
program define estate_top01shares
/*******************************************************************************
    Appendix Figure 33
	(ET) Wealth shares in estate tax data with different approaches
*******************************************************************************/

    /**************************************************************************
        Load in Matt estate tax series from 4/10/2020 e-mail. Keep only 
            KS, SZ, and our preferred estate tax series. Rescale shares so 
            that they're in percentage point terms. Save as tempfile.
    **************************************************************************/    

    insheet using $inputs/fig_a18v2.csv, clear

    keep year ws_ks_top01 ws_sz_top01 ws_m5l2i1_top01
    rename ws_*_top01 top01wlthsh_*
    rename (top01wlthsh_m5l2i1 top01wlthsh_ks) (top01wlthsh_szz top01wlthsh_ks_matt)

    foreach sharevar of varlist top01wlthsh* {
        replace `sharevar' = `sharevar' * 100
    }

    sort year

    tempfile estate
    save `estate'

    /**************************************************************************
         Import estate tax multiplier series from SZ 2019 BPEA figure 2A
    **************************************************************************/ 

    import delim using $inputs/bpea_estate_mult_eyeballed_ds.csv, clear

    qui ds
    rename (`r(varlist)') (year top01sh_estatemult_szbpea)

    tempfile szbpea
    save `szbpea'

    /**************************************************************************
        Load preferred shares from figure 1A collapse. Keep for years 
            1980-2016
    **************************************************************************/ 

    insheet using top01shares_unadjusted.csv, clear

    keep year top01_pref top01_ks 

    rename top01_* top01wlthsh_*

    keep if inrange(year, 1980, 2016)

    tempfile fig1a
    save `fig1a'

    insheet using topshares_adjusted.csv, clear

    keep year top01_equrtrns_v3

    rename top01_* top01wlthsh_*

    keep if inrange(year, 1980, 2016)

    tempfile fig1b
    save `fig1b'

    /**************************************************************************
        Merge data together and make graph
    **************************************************************************/ 

    merge 1:1 year using `szbpea', assert(1 3) nogen
    merge 1:1 year using `fig1a', assert(3) nogen
    merge 1:1 year using `fig1b', assert(3) nogen
    merge 1:1 year using `estate', assert(1 3) nogen

    sort year

    replace top01wlthsh_ks_matt = top01wlthsh_ks if missing(top01wlthsh_ks_matt)
    drop top01wlthsh_ks
    rename top01wlthsh_ks_matt top01wlthsh_ks

    #delimit ;
    twoway (connect top01wlthsh_szz year, ms(o) color("$f1") lw(medthick)) 
        (connect top01wlthsh_sz year, ms(sh) color("$f1") lp("-"))
        (connect top01sh_estatemult_szbpea year, ms(th) color("$f1") lp("."))
        (connect top01wlthsh_ks year, ms(d) color("$f1") lp(shortdash))      
        (connect top01wlthsh_pref year, ms(o) color("$u1") lw(thick))
        (connect top01wlthsh_equrtrns_v3 year, ms(s) color("$u3"))
        ,
        $gpr
        ytitle("Top 0.1% Wealth Share (%)") xtitle("") 
        xscale(range(1980 2016)) xlab(1980(5)2015)
        legend(order(1 "Preferred Estate Tax" 2 "SZ (2019) Estate Tax Replication"
                     3 "SZ (2019) Estate Tax Facsimile" 4 "KS (2004) Updated"
                     5 "Baseline Capitalized" 6 "Equal Returns Capitalized")
            row(2) region(lc(white) margin(tiny))) 
        xsize(8);
    #delimit cr
    graph export estate_top01shares.pdf, replace

end/*%>*/%>

capture program drop interestrate_pships/*%<*/
program define interestrate_pships
	*Appendix Figure 22a
    tempfile interest
    insheet using "$inputs/int1065_collapse_Radjgross_20201117.csv", comma clear

    keep rate wadjgross 
    save `interest'

    insheet using "$inputs/div1065_collapse_Radjgross_20201117.csv", comma clear
    keep div_yld wadjgross
    merge 1:1 wadjgross using `interest', keep(3) nogen

    replace div_yld = 100 * div_yld
    replace rate = 100 * rate

    #delimit ;
    twoway 
        (connect rate wadjgross, color("$u1") ms(o) lp(dash))
        (connect div_yld wadjgross, color("$u3") ms(s) lp(dash)),
        ylab(0(1)8)
        xtitle("AGI Percentile") ytitle("Interest Rate or Yield (%)")
        legend(order(1 "Interest Rates for Fixed Income Partnerships"
                     2 "Dividend Yields for Equity Partnerships") row(2) region(lc(white)))
        $gpr;
    #delimit cr
    graph export "hockeystick_rates_yields.pdf", replace

end/*%>*/

capture program drop scf_scaleprivbiz/*%<*/
program define scf_scaleprivbiz

    /*******************************************************************************
        Appendix Figure 16
		Load Financial Accounts concepts we want from the parameters file/*%<*/
    *******************************************************************************/

    load_analysis_data parameters_new

    keep year ttschcpartw ttscorw

    assert !missing(ttschcpartw) & !missing(ttscorw)

    replace ttschcpartw = ttschcpartw * 1E6
    replace ttscorw = ttscorw * 1E6

    rename tt* tt*_usfa

    /* Restrict to post-1996 b/c this is when Financial Accounts starts publishing
        disaggregated S-corporation asset amounts; see SZ 2020 note in 
        PSZ2020AppendixTablesI(Aggreg).xlsx, DataWealth sheet column EK: ``Before
        1996: assume 1996-2011 average return on positive profits.''*/ 
    replace ttscorw = . if year < 1996

    tempfile parameters
    save `parameters'/*%>*/

    /*******************************************************************************
        Load private C-corporations in the Financial Accounts/*%<*/
    *******************************************************************************/

    load_analysis_data usfa
    rename privccorw_usfa ttprivccorw_usfa
    assert ttprivccorw_usfa == 0 if year < 1996 // Don't have series before 1996
    drop if year < 1996

    replace ttprivccorw_usfa = ttprivccorw_usfa * 1E6

    tempfile privccorw
    save `privccorw'/*%>*/

    /*******************************************************************************
        Load SCF microfile and assemble version of networth with scaled private /*%<*/
            business business. Then save.
    *******************************************************************************/

    use $inputs/scf_revision.dta, clear

    * Pension and car debt
    drop networth_pref 
    gen networth_pref = networth + funded_pen_db - vehic - durables
    replace hwpen = hwpen - tot_pen_db + funded_pen_db
    replace hwoth = hwoth - veh_inst

    gen schcpartw = pthru - scorw
    gen schcpartw_check1 = pthrubus - scorw + nnresre
    gen schcpartw_check2 = partw + solepropw + nnresre

    #delimit ;
    assert (inrange(schcpartw / schcpartw_check1, 0.999, 1.001) |
            abs(schcpartw - schcpartw_check1) < 0.01) & 
           (inrange(schcpartw / schcpartw_check2, 0.999, 1.001) | 
            abs(schcpartw - schcpartw_check2) < 0.01);
    #delimit cr

    tempfile orig
    save `orig'

    collapse (sum) ttscorw_scf = scorw ttprivccorw_scf = privccorw ///
        ttschcpartw_scf = schcpartw [fw = wgt1B], by(year)

    foreach billionX of varlist tt* {
        replace `billionX' = `billionX' / 1E9
    }

    tempfile scftotals
    save `scftotals'

    use `orig', clear

    merge m:1 year using `scftotals', assert(3) nogen
    merge m:1 year using `parameters', assert(2 3) keep(3) nogen

    merge m:1 year using `privccorw'
    assert year < 1996 if _merge == 1
    assert mod(year - 1989, 3) > 0 | !inrange(year, 1996, 2016) if _merge == 2
    assert _merge == 3 if inrange(year, 1996, 2016) & mod(year - 1989, 3) == 0
    drop if _merge == 2
    drop _merge

    sort year
    foreach scfconcept of varlist scorw privccorw schcpartw {
        gen `scfconcept'_scalingfactor = tt`scfconcept'_usfa / tt`scfconcept'_scf
        
        if inlist("`scfconcept'", "scorw", "privccorw") {
            assert inrange(`scfconcept'_scalingfactor, 0, 1) if year > 1996
        }
        else {
            assert inrange(`scfconcept'_scalingfactor, 0, 1)   
        }

        gen `scfconcept'_scaled = `scfconcept' * `scfconcept'_scalingfactor
    }

    gen pthru_scaled = scorw_scaled + schcpartw_scaled
    gen ccorw_scaled = stocks + privccorw_scaled + stmutf + (0.5 * comutf) + ///
                        trusts_equity + (0.5 * omutf)
    gen hwbus_scaled = privccorw_scaled + pthru_scaled

    gen networth_scaledschcpartw = networth_pref + schcpartw_scaled - schcpartw
    gen networth_scaledpthru = networth_pref + pthru_scaled - pthru
    gen networth_scaledhwbus = networth_pref + hwbus_scaled - hwbus

    tempfile scf_revision_scaled
    save `scf_revision_scaled'

    /*******************************************************************************
        Make version of figure 1 panel B with USFA-scaled private business/*%<*/
    *******************************************************************************/

        /***************************************************************************
            Calculate top 0.01, top 0.1, top 1, and top 10 wealth shares under /*%<*/
                different versions of net worth concept
        ***************************************************************************/

    foreach scaled in schcpartw pthru hwbus {

        use `scf_revision_scaled', clear
        *use $dumpdir/scf_revision_scaledbiz.dta, clear

        es_rank_scf, rankvar(networth_scaled`scaled')
        
        /* Group 1 = bottom 90%, group 2 = P90-99, group 3 = P99-99.9, 
            group 4 = P99.9-99.99, group 5 = top 0.01% */
        #delimit ;
        qui gen wlthgrp = cond(es_rank > 0.9999, 5,
                          cond(es_rank > 0.999, 4,
                          cond(es_rank > 0.99, 3, 
                          cond(es_rank > 0.9, 2, 1))));
        #delimit cr
        
        collapse (sum) networth_scaled`scaled' [fw = wgt1B], by(year wlthgrp)

        * Unscale from billion x weight
        qui replace networth_scaled`scaled' = networth_scaled`scaled' / 1E9
        
        qui reshape wide networth_scaled`scaled', i(year) j(wlthgrp)
        isid year
        
        /* Create mutually inclusive top 0.1% and top 1% concepts from mutually 
            exclusive groups */
        qui gen networth_scaled`scaled'_t01 = networth_scaled`scaled'4 + networth_scaled`scaled'5
        qui gen networth_scaled`scaled'_t1 = networth_scaled`scaled'3 + networth_scaled`scaled'_t01
        qui gen networth_scaled`scaled'_t10 = networth_scaled`scaled'2 + networth_scaled`scaled'_t1
        
        * Create totals and check that totals are consistent
        qui egen total_wealth = rowtotal(networth_scaled`scaled'?)
        qui gen total_wealth_check = networth_scaled`scaled'1 + networth_scaled`scaled'_t10
        
        if inlist("`scaled'", "pthru", "hwbus") { // USFA closely held corps 
            assert inrange(total_wealth / total_wealth_check, 0.999, 1.001) if year >= 1996
            drop if year < 1996
        }
        else {
            assert inrange(total_wealth / total_wealth_check, 0.999, 1.001)   
        }
        
        rename networth_scaled`scaled'5 networth_scaled`scaled'_t001
        
        * Add Forbes wealth
        qui merge 1:1 year using $dtadir/forbeswlth.dta, keepusing(forbeswlth) ///
            assert(2 3) keep(3) nogen
        
        foreach addforbes of varlist networth_scaled`scaled'_t* total_wealth {
            qui replace `addforbes' = `addforbes' + forbeswlth
        }
        
        foreach topgrp in t10 t1 t01 t001 {
            qui gen `topgrp'_scfscaled`scaled' = (networth_scaled`scaled'_`topgrp' / total_wealth) * 100
            assert inrange(`topgrp'_scfscaled`scaled', 0, 100)
        }

        tempfile scfscaled`scaled'
        qui save `scfscaled`scaled''
    }/*%>*/

        /***************************************************************************
             Merge files together/*%<*/
        ***************************************************************************/

    import delim using topshares_adjusted.csv, clear

    rename top* t*

    merge 1:1 year using `scfscaledschcpartw', assert(1 3) nogen
    merge 1:1 year using `scfscaledpthru', assert(1 3) nogen
    merge 1:1 year using `scfscaledhwbus', assert(1 3) nogen/*%>*/

        /***************************************************************************
            Make plots/*%<*/
        ***************************************************************************/

    sort year
    foreach topgrp in t001 t01 t1{
       foreach scaled in schcpartw pthru hwbus {     
            
            local scaledname = cond("`scaled'" == "schcpartw", "Non-Corp Biz", ///
                               cond("`scaled'" == "pthru", "Pass-Through", "Pvt Biz"))

            #delimit ;
            graph twoway 
                /*(connect `topgrp'_pszes year, ms(s) lc("$u3") mc("$u3"))*/
                (connect `topgrp'_equrtrns_v3 year, ms(s) lc("$u3") mc("$u3"))
                (connect `topgrp'_pref year, ms(O) lc("$u1") mc("$u1") lw(thick))
                (connect `topgrp'_scfpref year, ms(T) lc("$p2") mc("$p2") lw(thin))
                (connect `topgrp'_scfscaled`scaled' year, ms(Th) lc("$p2") mc("$p2") lw(thin) lp(-))
                ,
                $gpr
                legend(order(/*1 "Equal Return, Equal Split (PSZ 2018)"
                             2 "Equal Return, Equal Split (PSZ 2018 ext.)"*/
                             1 "Equal Returns"
                             2 "Baseline"
                             3 "Harmonized SCF w/Forbes"
                             4 "Harmonized SCF w/USFA-Scaled `scaledname'")
                    col(1) region(lcolor(white) margin(tiny)))
                xlab(1960(20)2020) xtitle(" ")
                ytitle("Share of Total Household Wealth (%)") xsize(4.5);
            #delimit cr
            graph export `topgrp'shares_scfscaled`scaled'.pdf, replace
        }
    }/*%>*//*%>*/

    /*******************************************************************************
         Plot wealth composition over time
    *******************************************************************************/

        /***************************************************************************
             Prepare SCF wealth composition collapses in which business is /*%<*/
                scaled depending on specification
        ***************************************************************************/

    local whichspec = 1

    foreach scaled in schcpartw pthru hwbus {

        use `scf_revision_scaled', clear

        gen pthru_orig = pthru
        gen ccorw_orig = ccorw

        if "`scaled'" == "hwbus" {
            assert !missing(ccorw_scaled) if year >= 1996
            
            drop pthru hwbus ccorw
            rename (pthru_scaled hwbus_scaled ccorw_scaled) (pthru hwbus ccorw)
        }
        else {
            qui replace hwbus = hwbus - `scaled' + `scaled'_scaled
            qui replace pthru = pthru - `scaled' + `scaled'_scaled  
        }
        
        es_rank_scf, rankvar(networth_scaled`scaled') ///
            othersplitvars("hwequ ccorw hwfix hwbus pthru hwpen hwhou hwoth othdebt")

        /* Wealth groups: 6 = below P90, 5 = P90-99; 4 = P99-99.9; 3 = P99.9-99.99, 
            2 = P99.99-99.999, 1 = top 0.001% (P99.999-100) */
        #delimit ;
        qui gen wlthgrp = cond(es_rank < 0.90, 6, 
                      cond(es_rank < 0.99, 5, 
                      cond(es_rank < 0.999, 4, 
                      cond(es_rank < 0.9999, 3, 
                      cond(es_rank < 0.99999, 2, 1)))));
        #delimit cr

        assert sign(othdebt) == -1 | othdebt == 0
        qui replace hwoth = hwoth + othdebt

        #delimit ;
        gcollapse (sum) hweal_scf = networth_scaled`scaled' hwequ_scf = hwequ 
            ccorw_scf = ccorw hwfix_scf = hwfix hwbus_scf = hwbus pthru_scf = pthru 
            hwpen_scf = hwpen hwhou_scf = hwhou hwoth_scf = hwoth [fw = wgt1B], 
            by(year wlthgrp);
        #delimit cr

        foreach billionX of varlist *_scf {
            qui replace `billionX' = `billionX' / 1E9 // Unscale from 1B weights
        }

        qui gen pen_hou_oth_scf = hwpen_scf + hwhou_scf + hwoth_scf

        qui gen hweal_check1 = hwequ_scf + hwfix_scf + hwbus_scf + pen_hou_oth_scf
        qui gen hweal_check2 = ccorw_scf + hwfix_scf + pthru_scf + pen_hou_oth_scf
        if "`scaled'" == "schcpartw" {
            assert inrange(hweal_check1 / hweal_scf, 0.999, 1.001) & ///
                   inrange(hweal_check2 / hweal_scf, 0.999, 1.001)
        }  
        else {
            assert inrange(hweal_check1 / hweal_scf, 0.999, 1.001) & ///
                   inrange(hweal_check2 / hweal_scf, 0.999, 1.001) if year >= 1996
        }
        drop hweal_check?

        qui reshape wide *_scf, i(year) j(wlthgrp)

        foreach wlthcmpt in hweal_scf hwequ_scf ccorw_scf hwfix_scf hwbus_scf ///
            pthru_scf hwpen_scf hwhou_scf hwoth_scf pen_hou_oth_scf {

            qui egen `wlthcmpt'_total = rowtotal(`wlthcmpt'?)

            qui gen `wlthcmpt'_t001 = `wlthcmpt'1 + `wlthcmpt'2
            qui gen `wlthcmpt'_t01 = `wlthcmpt'_t001 + `wlthcmpt'3
            qui gen `wlthcmpt'_t1 = `wlthcmpt'_t01 + `wlthcmpt'4
            
            drop `wlthcmpt'2 `wlthcmpt'3 `wlthcmpt'4
            
            rename (`wlthcmpt'6 `wlthcmpt'5 `wlthcmpt'1) ///
                (`wlthcmpt'_b90 `wlthcmpt'_p9099 `wlthcmpt'_t0001)
        }

        foreach wlthcmpt in hwequ ccorw hwfix hwbus pthru pen_hou_oth hwpen hwhou hwoth {
            foreach grp in b90 p9099 t1 t01 t001 t0001 {
                qui gen `wlthcmpt'_sh_scf_`grp' = (`wlthcmpt'_scf_`grp' / hweal_scf_`grp') * 100
                qui gen `wlthcmpt'_tsh_scf_`grp' = (`wlthcmpt'_scf_`grp' / hweal_scf_total) * 100
                
                qui replace `wlthcmpt'_scf_`grp' = `wlthcmpt'_scf_`grp' / 1E12
            }    
        }

        rename *_scf_* *_scf`whichspec'_* 

        tempfile scfscaled`scaled'
        qui save `scfscaled`scaled''

        local ++whichspec
    }/*%>*/

        /***************************************************************************
            Merge files together/*%<*/
        ***************************************************************************/

    import delimited using wealthcompovertime.csv, clear

    rename (*top* *bot*) (*t* *b*)

    merge 1:1 year using `scfscaledschcpartw', assert(1 3) nogen
    merge 1:1 year using `scfscaledpthru', assert(1 3) nogen
    merge 1:1 year using `scfscaledhwbus', assert(1 3) nogen


    sort year
    foreach sharevar in hwfix hwequ ccorw hwbus pthru hwpen hwhou hwoth pen_hou_oth {
        
        #delimit ;
        local cmptname = cond("`sharevar'" == "hwfix", "Fixed Income", 
                         cond("`sharevar'" == "hwequ", "Public Equity", 
                         cond("`sharevar'" == "ccorw", "C-corporation Equity", 
                         cond("`sharevar'" == "hwbus", "Private Business", 
                         cond("`sharevar'" == "pthru", "Pass-through Business", 
                         cond("`sharevar'" == "hwhou", "Housing", 
                         cond("`sharevar'" == "hwpen", "Pensions",
                         cond("`sharevar'" == "hwoth", "Other", "Pensions, Housing, and Other"))))))));
        
			local dfagrp = t1;
            
            twoway (connect `sharevar'_sh_preferred_`dfagrp' year, ms(o) lc("$u1") mc("$u1") lw(medthick))
                (connect `sharevar'_sh_equ_`dfagrp' year, ms(s) lc("$u3") mc("$u3") lp(-))
                (connect `sharevar'_sh_scf_`dfagrp' year, ms(t) lc("$p2") mc("$p2") lw(thin))
                (connect `sharevar'_sh_scf1_`dfagrp' year, ms(th) lc("$f1") mc("$f1") lw(thin) lp(-))
                (connect `sharevar'_sh_scf2_`dfagrp' year, ms(th) lc("$p2") mc("$p2") lw(thin) lp("_."))
                (connect `sharevar'_sh_scf3_`dfagrp' year, ms(th) lc("$f3") mc("$f3") lw(thin) lp("."))
                (connect `sharevar'_sh_dfa_`dfagrp' year, ms(d) lc("$u4") mc("$u4") lp("-."))
                ,
                $gpr 
                xlab(1960(20)2020)
                yscale(range(0 70)) ylab(0(10)70)
                ytitle("Share of Wealth (%)") xtitle("")
                legend(order(2 "Equal Returns" 1 "Baseline" 7 "DFA" 3 "SCF"
                             4 "SCF, Scale Schc+Partw" 5 "SCF, Scale Pass-Through" 
                             6 "SCF, Scale Pvt Biz") 
                        region(lcolor(white) margin(tiny)) col(2))

                xsize(6.45);
            graph export `sharevar'_shares_`dfagrp'_bizscale.pdf, replace;
            #delimit cr
        
        
        foreach grp in t01 t001 {
            local grplab = cond("`grp'" == "top01", "Top 0.1%", ///
                           cond("`grp'" == "top001", "Top 0.01%"))

            local startyr = cond("`grp'" == "t0001", 1989, 1966)
            local labstart = cond("`grp'" == "t0001", 1990, 1960)

            #delimit ;
            twoway (connect `sharevar'_sh_preferred_`grp' year, ms(o) lc("$u1") mc("$u1") lwidth(medthick))
                (connect `sharevar'_sh_equ_`grp' year, ms(s) lc("$u3") mc("$u3") lpattern(-))
                (connect `sharevar'_sh_scf_`grp' year, ms(t) lc("$p2") mc("$p2") lwidth(thin))
                (connect `sharevar'_sh_scf1_`grp' year, ms(th) lc("$f1") mc("$f1") lw(thin) lp(-))
                (connect `sharevar'_sh_scf2_`grp' year, ms(th) lc("$p2") mc("$p2") lw(thin) lp("_."))
                (connect `sharevar'_sh_scf3_`grp' year, ms(th) lc("$f3") mc("$f3") lw(thin) lp("."))
                if inrange(year, `startyr', 2020)
                ,
                $gpr 
                xlab(`labstart'(20)2020)
                yscale(range(0 70)) ylab(0(10)70)
                ytitle("Share of Wealth (%)") xtitle("")
                legend(order(2 "Equal Returns" 1 "Baseline" 3 "SCF" 
                             4 "SCF, Scale Schc+Partw" 
                             5 "SCF, Scale Pass-Through" 
                             6 "SCF, Scale Pvt Biz") 
                        region(lcolor(white) margin(tiny)) col(2))

                xsize(6.45);
            graph export `sharevar'_shares_`grp'_bizscale.pdf, replace;
            #delimit cr
        }
    }/*%>*/

end/*%>*/

capture program drop scf_scaletaxbond/*%<*/
program define scf_scaletaxbond

    /*******************************************************************************
        Appendix Figures 15, 17 and 18
		 Load Financial Accounts concepts we want from the parameters file
    *******************************************************************************/

    load_analysis_data parameters_new

    gen tthwfix = ttinttaxw + ttmmbondfund + ttcurrency + ttintexmw

    keep year ttinttaxw tthwfix 
    assert !missing(tthwfix) & !missing(ttinttaxw)

    replace ttinttaxw = ttinttaxw * 1E6
    replace tthwfix = tthwfix * 1E6

    rename tt* tt*_usfa

    tempfile parameters
    save `parameters'

    /*******************************************************************************
        Load SCF microfile and assemble version of networth with scaled private 
            business business. Then save.
    *******************************************************************************/

    use $inputs/scf_revision.dta, clear

    * Pension and car debt
    drop networth_pref 
    gen networth_pref = networth + funded_pen_db - vehic - durables
    replace hwpen = hwpen - tot_pen_db + funded_pen_db
    replace hwoth = hwoth - veh_inst

    tempfile orig
    save `orig'

    collapse (sum) ttinttaxw_scf = inttaxw tthwfix_scf = hwfix [fw = wgt1B], by(year)

    foreach billionX of varlist tt* {
        replace `billionX' = `billionX' / 1E9
    }

    tempfile scftotals
    save `scftotals'

    use `orig', clear

    merge m:1 year using `scftotals', assert(3) nogen
    merge m:1 year using `parameters', assert(2 3) keep(3) nogen

    sort year
    foreach scfconcept of varlist hwfix inttaxw {
        gen `scfconcept'_scalingfactor = tt`scfconcept'_usfa / tt`scfconcept'_scf
        
        assert !missing(`scfconcept'_scalingfactor)

        gen `scfconcept'_scaled = `scfconcept' * `scfconcept'_scalingfactor
    }

    gen networth_scaledinttaxw = networth_pref + inttaxw_scaled - inttaxw
    gen networth_scaledhwfix = networth_pref + hwfix_scaled - hwfix

    tempfile scf_revision_scaled
    save `scf_revision_scaled'

    /*******************************************************************************
         Make version of figure 1 panel B with USFA-scaled private business
    *******************************************************************************/

        /***************************************************************************
            Calculate top 0.01, top 0.1, top 1, and top 10 wealth shares under 
                different versions of net worth concept
        ***************************************************************************/

    foreach scaled in inttaxw hwfix {

        use `scf_revision_scaled', clear
        *use $dumpdir/scf_revision_scaledbiz.dta, clear

        es_rank_scf, rankvar(networth_scaled`scaled')
        
        /* Group 1 = bottom 90%, group 2 = P90-99, group 3 = P99-99.9, 
            group 4 = P99.9-99.99, group 5 = top 0.01% */
        #delimit ;
        qui gen wlthgrp = cond(es_rank > 0.9999, 5,
                          cond(es_rank > 0.999, 4,
                          cond(es_rank > 0.99, 3, 
                          cond(es_rank > 0.9, 2, 1))));
        #delimit cr
        
        collapse (sum) networth_scaled`scaled' [fw = wgt1B], by(year wlthgrp)

        * Unscale from billion x weight
        qui replace networth_scaled`scaled' = networth_scaled`scaled' / 1E9
        
        qui reshape wide networth_scaled`scaled', i(year) j(wlthgrp)
        isid year
        
        /* Create mutually inclusive top 0.1% and top 1% concepts from mutually 
            exclusive groups */
        qui gen networth_scaled`scaled'_t01 = networth_scaled`scaled'4 + networth_scaled`scaled'5
        qui gen networth_scaled`scaled'_t1 = networth_scaled`scaled'3 + networth_scaled`scaled'_t01
        qui gen networth_scaled`scaled'_t10 = networth_scaled`scaled'2 + networth_scaled`scaled'_t1
        
        * Create totals and check that totals are consistent
        qui egen total_wealth = rowtotal(networth_scaled`scaled'?)
        qui gen total_wealth_check = networth_scaled`scaled'1 + networth_scaled`scaled'_t10
        
        if inlist("`scaled'", "pthru", "hwbus") { // USFA closely held corps 
            assert inrange(total_wealth / total_wealth_check, 0.999, 1.001) if year >= 1996
            drop if year < 1996
        }
        else {
            assert inrange(total_wealth / total_wealth_check, 0.999, 1.001)   
        }
        
        rename networth_scaled`scaled'5 networth_scaled`scaled'_t001
        
        * Add Forbes wealth
        qui merge 1:1 year using $inputs/forbeswlth.dta, keepusing(forbeswlth) ///
            assert(2 3) keep(3) nogen
        
        foreach addforbes of varlist networth_scaled`scaled'_t* total_wealth {
            qui replace `addforbes' = `addforbes' + forbeswlth
        }
        
        foreach topgrp in t1 t01 t001 {
            qui gen `topgrp'_scfscaled`scaled' = (networth_scaled`scaled'_`topgrp' / total_wealth) * 100
            assert inrange(`topgrp'_scfscaled`scaled', 0, 100)
        }

        tempfile scfscaled`scaled'
        qui save `scfscaled`scaled''
    }

        /***************************************************************************
            Merge files together
        ***************************************************************************/

    import delim using topshares_adjusted.csv, clear

    rename top* t*

    merge 1:1 year using `scfscaledhwfix', assert(1 3) nogen
    merge 1:1 year using `scfscaledinttaxw', assert(1 3) nogen

        /***************************************************************************
            Make plots
        ***************************************************************************/

    sort year
    foreach topgrp in t001 t01 t1 {
       foreach scaled in hwfix inttaxw {     
            
            local scaledname = cond("`scaled'" == "hwfix", "Risk-Based Fix", "Flows-Based Fix")

            #delimit ;
            graph twoway 
                (connect `topgrp'_equrtrns_v3 year, ms(s) lc("$u3") mc("$u3"))
                (connect `topgrp'_pref year, ms(O) lc("$u1") mc("$u1") lw(thick))
                (connect `topgrp'_scfpref year, ms(T) lc("$p2") mc("$p2") lw(thin))
                (connect `topgrp'_scfscaled`scaled' year, ms(Th) lc("$p2") mc("$p2") lw(thin) lp(-))
                ,
                $gpr
                legend(order(1 "Equal Returns"
                             2 "Baseline"
                             3 "Harmonized SCF w/Forbes"
                             4 "Harmonized SCF w/USFA-Scaled `scaledname'")
                    col(1) region(lcolor(white) margin(tiny)))
                xlab(1960(20)2020) xtitle(" ")
                ytitle("Share of Total Household Wealth (%)") xsize(4.5);
            #delimit cr
            graph export `topgrp'shares_scfscaled`scaled'.pdf, replace
        }
    }

    /*******************************************************************************
        Plot wealth composition over time
    *******************************************************************************/

        /***************************************************************************
            Prepare SCF wealth composition collapses in which business is 
                scaled depending on specification
        ***************************************************************************/

    local whichspec = 1

    foreach scaled in hwfix inttaxw {

        use `scf_revision_scaled', clear
        *use $dumpdir/scf_revision_scaledbiz.dta, clear

        replace hwfix = hwfix - `scaled' + `scaled'_scaled

        es_rank_scf, rankvar(networth_scaled`scaled') ///
            othersplitvars("hwequ ccorw hwfix hwbus pthru hwpen hwhou hwoth othdebt")

        /* Wealth groups: 6 = below P90, 5 = P90-99; 4 = P99-99.9; 3 = P99.9-99.99, 
            2 = P99.99-99.999, 1 = top 0.001% (P99.999-100) */
        #delimit ;
        qui gen wlthgrp = cond(es_rank < 0.90, 6, 
                      cond(es_rank < 0.99, 5, 
                      cond(es_rank < 0.999, 4, 
                      cond(es_rank < 0.9999, 3, 
                      cond(es_rank < 0.99999, 2, 1)))));
        #delimit cr

        assert sign(othdebt) == -1 | othdebt == 0
        qui replace hwoth = hwoth + othdebt

        #delimit ;
        gcollapse (sum) hweal_scf = networth_scaled`scaled' hwequ_scf = hwequ 
            ccorw_scf = ccorw hwfix_scf = hwfix hwbus_scf = hwbus pthru_scf = pthru 
            hwpen_scf = hwpen hwhou_scf = hwhou hwoth_scf = hwoth [fw = wgt1B], 
            by(year wlthgrp);
        #delimit cr

        foreach billionX of varlist *_scf {
            qui replace `billionX' = `billionX' / 1E9 // Unscale from 1B weights
        }

        qui gen pen_hou_oth_scf = hwpen_scf + hwhou_scf + hwoth_scf

        qui gen hweal_check1 = hwequ_scf + hwfix_scf + hwbus_scf + pen_hou_oth_scf
        qui gen hweal_check2 = ccorw_scf + hwfix_scf + pthru_scf + pen_hou_oth_scf
        if "`scaled'" == "schcpartw" {
            assert inrange(hweal_check1 / hweal_scf, 0.999, 1.001) & ///
                   inrange(hweal_check2 / hweal_scf, 0.999, 1.001)
        }  
        else {
            assert inrange(hweal_check1 / hweal_scf, 0.999, 1.001) & ///
                   inrange(hweal_check2 / hweal_scf, 0.999, 1.001) if year >= 1996
        }
        drop hweal_check?

        qui reshape wide *_scf, i(year) j(wlthgrp)

        foreach wlthcmpt in hweal_scf hwequ_scf ccorw_scf hwfix_scf hwbus_scf ///
            pthru_scf hwpen_scf hwhou_scf hwoth_scf pen_hou_oth_scf {

            qui egen `wlthcmpt'_total = rowtotal(`wlthcmpt'?)

            qui gen `wlthcmpt'_t001 = `wlthcmpt'1 + `wlthcmpt'2
            qui gen `wlthcmpt'_t01 = `wlthcmpt'_t001 + `wlthcmpt'3
            qui gen `wlthcmpt'_t1 = `wlthcmpt'_t01 + `wlthcmpt'4
            
            drop `wlthcmpt'2 `wlthcmpt'3 `wlthcmpt'4
            
            rename (`wlthcmpt'6 `wlthcmpt'5 `wlthcmpt'1) ///
                (`wlthcmpt'_b90 `wlthcmpt'_p9099 `wlthcmpt'_t0001)
        }

        foreach wlthcmpt in hwequ ccorw hwfix hwbus pthru pen_hou_oth hwpen hwhou hwoth {
            foreach grp in b90 p9099 t1 t01 t001 t0001 {
                qui gen `wlthcmpt'_sh_scf_`grp' = (`wlthcmpt'_scf_`grp' / hweal_scf_`grp') * 100
                qui gen `wlthcmpt'_tsh_scf_`grp' = (`wlthcmpt'_scf_`grp' / hweal_scf_total) * 100
                
                qui replace `wlthcmpt'_scf_`grp' = `wlthcmpt'_scf_`grp' / 1E12
            }    
        }

        rename *_scf_* *_scf`whichspec'_* 

        tempfile scfscaled`scaled'
        qui save `scfscaled`scaled''

        local ++whichspec
    }

        /***************************************************************************
            Merge files together
        ***************************************************************************/

    import delimited using wealthcompovertime.csv, clear

    rename (*top* *bot*) (*t* *b*)

    merge 1:1 year using `scfscaledhwfix', assert(1 3) nogen
    merge 1:1 year using `scfscaledinttaxw', assert(1 3) nogen

    sort year
    foreach sharevar in hwfix hwequ ccorw hwbus pthru hwpen hwhou hwoth pen_hou_oth {
        
        #delimit ;
        local cmptname = cond("`sharevar'" == "hwfix", "Fixed Income", 
                         cond("`sharevar'" == "hwequ", "Public Equity", 
                         cond("`sharevar'" == "ccorw", "C-corporation Equity", 
                         cond("`sharevar'" == "hwbus", "Private Business", 
                         cond("`sharevar'" == "pthru", "Pass-through Business", 
                         cond("`sharevar'" == "hwhou", "Housing", 
                         cond("`sharevar'" == "hwpen", "Pensions",
                         cond("`sharevar'" == "hwoth", "Other", "Pensions, Housing, and Other"))))))));
        #delimit cr
        
        local dfagrp=t1

            #delimit ;
            twoway (connect `sharevar'_sh_preferred_`dfagrp' year, ms(o) lc("$u1") mc("$u1") lw(medthick))
                (connect `sharevar'_sh_equ_`dfagrp' year, ms(s) lc("$u3") mc("$u3") lp(-))
                (connect `sharevar'_sh_scf_`dfagrp' year, ms(t) lc("$p2") mc("$p2") lw(thin))
                (connect `sharevar'_sh_scf1_`dfagrp' year, ms(th) lc("$f1") mc("$f1") lw(thin) lp(-))
                (connect `sharevar'_sh_scf2_`dfagrp' year, ms(th) lc("$p2") mc("$p2") lw(thin) lp("_."))
                (connect `sharevar'_sh_dfa_`dfagrp' year, ms(d) lc("$u4") mc("$u4") lp("-."))
                ,
                $gpr 
                xlab(1960(20)2020)
                yscale(range(0 70)) ylab(0(10)70)
                ytitle("Share of Wealth (%)") xtitle("")
                legend(order(2 "Equal Returns" 1 "Baseline" 6 "DFA" 3 "SCF"
                             4 "SCF, Risk-Based Fix" 5 "SCF, Flows-Based Fix") 
                        region(lcolor(white) margin(tiny)) col(2))

                xsize(6.45);
            graph export `sharevar'_shares_`dfagrp'_taxbondscaled.pdf, replace;
            #delimit cr
        
        
        foreach grp in t01 t001 {

            local startyr = cond("`grp'" == "t0001", 1989, 1966)
            local labstart = cond("`grp'" == "t0001", 1990, 1960)

            #delimit ;
            twoway (connect `sharevar'_sh_preferred_`grp' year, ms(o) lc("$u1") mc("$u1") lwidth(medthick))
                (connect `sharevar'_sh_equ_`grp' year, ms(s) lc("$u3") mc("$u3") lpattern(-))
                (connect `sharevar'_sh_scf_`grp' year, ms(t) lc("$p2") mc("$p2") lwidth(thin))
                (connect `sharevar'_sh_scf1_`grp' year, ms(th) lc("$f1") mc("$f1") lw(thin) lp(-))
                (connect `sharevar'_sh_scf2_`grp' year, ms(th) lc("$p2") mc("$p2") lw(thin) lp("_."))
                if inrange(year, `startyr', 2020)
                ,
                $gpr 
                xlab(`labstart'(20)2020)
                yscale(range(0 70)) ylab(0(10)70)
                ytitle("Share of Wealth (%)") xtitle("")
                legend(order(2 "Equal Returns" 1 "Baseline" 3 "SCF"
                             4 "SCF, Risk-Based Fix" 5 "SCF, Flows-Based Fix") 
                        region(lcolor(white) margin(tiny)) col(2))

                xsize(6.45);
            graph export `sharevar'_shares_`grp'_taxbondscaled.pdf, replace;
            #delimit cr
        }
    }

end/*%>*/

capture program drop graph_ultrarich_top01/*%<*/
program define graph_ultrarich_top01

    /***************************************************************************
        Appendix Figure 7
		Capitalized series
    ***************************************************************************/
    
    /***********************************************************************
        Top 0.1% and top 0.01% (groups we can get from main wealth
            collapses)
    ***********************************************************************/
    load_analysis_data szz_tu
    keep if year == 2016

    rename (hweal${preferred_defn_late} n) (hweal N)

    keep if inlist(w${preferred_defn_late}_group, 9, 10, 5, 11, 6)
    assert inlist(group, "P99-99.9", "P99.9-99.99", "P99.99-100", ///
                        "P99.99-99.999", "P99.999-100")
    assert _N == 5

    keep hweal group N threshold

    rename * *_tu
    rename group_tu group

    tempfile maincapitalized_tu
    save `maincapitalized_tu'

    load_analysis_data szz 
    keep if year == 2016

    rename (hweal${preferred_defn_late} n) (hweal N)
    
    keep if inlist(w${preferred_defn_late}_group, 9, 10, 5, 11, 6)
    assert inlist(group, "P99-99.9", "P99.9-99.99", "P99.99-100", ///
                        "P99.99-99.999", "P99.999-100")
    assert _N == 5

    keep hweal group N threshold

    tempfile maincapitalized 
    merge 1:1 group using `maincapitalized_tu', keep(1 3) nogen
    save `maincapitalized'

    /***************************************************************************
         Forbes wealth
    ***************************************************************************/

    load_analysis_data forbes

    keep if year == 2016
    assert _N == 1
    drop year

    rename (forbeswlth forbes_threshold) (hweal threshold)

    gen group = "Forbes"
    gen N = 400 * 2 // Assume all Forbes are married

    tempfile forbes400
    save `forbes400'

    /***************************************************************************
         Append together
    ***************************************************************************/

    use `forbes400', clear

    append using `maincapitalized' `inside_ultrarich' 

    replace hweal = hweal / 1E12
    replace hweal_tu = hweal_tu / 1E12
    replace N_tu = N_tu / 1E3

    format hweal hweal_tu N N_tu %9.1fc

    replace group = "Top 0.01%" if group == "P99.99-100"
    replace group = "Top 0.001%" if group == "P99.999-100"

    /***************************************************************************
         Output results, finagle to make graph
    ***************************************************************************/

    outsheet group N N_tu using ultrarich_vs_forbes_counts.csv, comma replace
    outsheet group threshold threshold_tu using ultrarich_vs_forbes_thresholds.csv, comma replace

    gen hweal_forbes = hweal if group == "Forbes"
    gen hweal_captlzd = hweal if group != "Forbes"
    gen hweal_captlzd_tu = hweal_tu if group != "Forbes"

    encode group, gen(group_num)

    #delimit ;
    graph bar hweal_captlzd hweal_forbes hweal_captlzd_tu
        if inlist(group_num, 1, 2, 3, 6)
        , 
        bargap(-40)
        over(group_num) /*stack*/
        $gpr
        blabel(bar, position(center) color(white) format(%9.1fc) size(small))
        bar(2, color("$u3"))
        bar(1, color("$u1"))
        bar(3, color("$p2"))
        ytitle("Wealth (trillions)")
        legend(order(1 "Baseline, Individual" 3 "Baseline, Tax Unit") region(lc(white)))
        xsize(6.5);
    #delimit cr
    graph export ultrarich_vs_forbes_top01.pdf, replace

    * More groups
    #delimit ;
    graph bar hweal_captlzd hweal_forbes hweal_captlzd_tu
        if inlist(group_num, 1, 2, 3, 4, 5)
        , 
        bargap(-40)
        over(group_num) /*stack*/
        $gpr
        blabel(bar, position(center) color(white) format(%9.1fc) size(small))
        bar(2, color("$u3"))
        bar(1, color("$u1"))
        bar(3, color("$p2"))
        ytitle("Wealth (trillions)")
        legend(order(1 "Baseline, Individual" 3 "Baseline, Tax Unit") region(lc(white)))
        xsize(6.5);
    #delimit cr
    graph export ultrarich_vs_forbes.pdf, replace

end/*%>*/

****************************************************************************
* Equity
****************************************************************************
capture program drop graph_soca_portfolio_equity/*%<*/
program define graph_soca_portfolio_equity
	*Appendix Figure 25a
    load_analysis_data soca
    keep if year > 1985

    gen period = cond(inrange(year,1996,1999), 1, ///
                    cond(inrange(year,2003,2007), 2, ///
                        cond(inrange(year,2010,2012), 3, .)))

    label define periodlbl 1 "1996-99" 2 "2003-07" 3 "2010-12"
    label values period periodlbl

    collapse (sum) hard_assets stocks fin_assets net_pship_scorp_estate ///
        net_pthrough net_tot

    gen corp_mut_share = 100 * (stocks) / net_tot
    gen hard_share = 100 * (hard_assets) / net_tot 
    gen passthru_share = 100 * net_pthrough / net_tot
    egen non_other_share = rowtotal(corp_mut_share-passthru_share)
    gen other_share = 100 - non_other
    drop non_other_share

    label var corp_mut_share "Stocks & Mutual Funds"
    label var hard_share "Real Estate & Tangible Assets"
    label var passthru_share "Pass-through Gains"
    label var other_share "Other & Unidentified"

    #delimit;
    graph bar (asis) corp_mut_share passthru_share hard_share other_share,
        bargap(20) 
        bar(1, color("$u1")) bar(2, color("$u3")) 
        bar(3, fcolor("$u2") lcolor("$u4") lw(medthick)) 
        bar(4, color("$f1") lcolor("$f4") lw(medthick))
        ytitle("Share of Total Realized Capital Gains (%)")
        legend(region(lcolor(white))) $gpr;
    graph export "share_bar_soca.pdf", replace;
    #delimit cr

end/*%>*/

capture program drop graph_optimal_alpha/*%<*/
program define graph_optimal_alpha

    /***************************************************************************
        Appendix Figure 25b
		Error-minimizing weight on dividends in SCF capitalization
    ***************************************************************************/
    use $inputs/scf_revision.dta, clear

    * Pension and vehicle adjustments
    drop networth_pref
    gen networth_pref = networth + funded_pen_db - vehic - durables
    replace hwpen = hwpen - tot_pen_db + funded_pen_db
    replace hwoth = hwoth - veh_inst

    es_rank_scf, rankvar(networth_pref) othersplitvars("kginc divinc hwequ ccorw_mutf")

    keep year wgt wgt1B es_rank networth_pref kginc divinc hwequ ccorw_mutf

    /* Group 5 = Top 0.01%, Group 4 = P99.9-99.99, Group 3 = P99-99.9, 
        Group 2 = P90-99, Group 1 = Bottom 90% */
    #delimit ;
    gen group = cond(es_rank >= 0.9999, 5,
                cond(es_rank >= 0.999, 4,  
                cond(es_rank >= 0.99, 3, 
                cond(es_rank >= 0.9, 2, 1))));
    #delimit cr

    * To match PSZ/tax data kginc definition, bottom code losses at -3000
    replace kginc = cond(kginc < -3000, -3000, kginc)

    collapse (sum) kginc divinc ccorw_mutf hwequ networth_pref [fw = wgt1B], by(year group)
    
    sort year
    foreach billionX of varlist kginc divinc ccorw_mutf hwequ networth_pref {
        replace `billionX' = `billionX' / 1E9
        
        by year: egen tt`billionX' = total(`billionX')
    }

    foreach aggvar of varlist kginc divinc hwequ ccorw_mutf networth_pref {
        gen sh_`aggvar' = `aggvar' / tt`aggvar'
    }

    rename (sh_ccorw_mutf /* sh_hwequ */ divinc kginc ttdivinc ttkginc) (s yD yG ttyD ttyG)

    /**/ drop sh_hwequ
    keep s sh* y* ttyD ttyG year group

    reshape wide sh_kginc sh_divinc sh_networth s yD yG, i(year) j(group)

    local a0 = 0.5 
    forv g = 1 / 5 {

        nl (s`g' = ({alpha=`a0'} * yD`g' + (1-{alpha=`a0'}) * yG`g') / ({alpha=`a0'} * ttyD + (1-{alpha=`a0'}) * ttyG))

        matrix A = e(b) 
        gen alpha`g' = A[1,1]

        matrix V = e(V)
        gen se_alpha`g' = V[1,1]^.5
    }

    collapse (mean) alpha? se_alpha?

    xpose, clear varname

    gen bin = real(substr(_varname, -1, 1))
    assert !missing(bin) 

    replace _varname = substr(_varname, 1, strlen(_varname) - 1)

    reshape wide v1, i(bin) j(_varname, string)
    rename v1* *

    gen alpha_ub = alpha + 1.96 * se_alpha
    gen alpha_lb = alpha - 1.96 * se_alpha

    label define binlabel 1 "P0-90" 2 "P90-99" 3 "P99-99.9" 4 "P99.9-99.99" 5 "P99.99-100"
    label values bin binlabel

    #delimit ;
    twoway (rcap alpha_ub alpha_lb bin, lc(navy*0.4) lp("-")) 
            (scatter alpha bin, ms(o) lc("$u1") mc("$u1")), 
            legend(off) xti("Wealth Group") yti("Error-minimizing weight on dividends") 
            graphregion(color(white)) bgcolor(white) 
            yline(0.5, lc("$u3") lp("_")) /* SZ 2016 */
            yline(0.9, lc(black) lp("_")) /* SZZ 2020 */
            xlabel(, labels valuelabel) xscale(range(0.75 5.25))
            ylab(0(.25)1.5, nogrid format(%9.2f));
    #delimit cr
    graph export "error_minimize_alpha_ccorw_mutf.pdf", replace

end/*%>*/

capture program drop graph_forbes_adjustment/*%<*/
program define graph_forbes_adjustment

    /***************************************************************************
        Appendix Figure 8
		Differences in stock wealth or wealth shares after BHH-V
            adjustment
    ***************************************************************************/

    tempfile forbes
    load_analysis_data szz_forbes
    save `forbes'
    /***********************************************************************
         Pull PSZ 2018 top 01 share/equal split extension thereof 
            from figure 1B
    ***********************************************************************/
    import delim topshares_adjusted.csv, clear

    keep year top*_equrtrns_v3 top*_preferred
    rename top*_equrtrns_v3 top*share_equrtrns
    rename top*_preferred top*share_pref

    /***********************************************************************
        Merge on top shares from Drew which adjust for Forbes 400 
            in different ways, then plot series
    ***********************************************************************/
    merge 1:1 year using `forbes', keep(3) nogen

    rename (top1share_* top01share_* top001share_* top0001share_*) (*_top1 *_top01 *_top001 *_top0001)

    keep equrtrns* rawdina* replaced* adjusted* pref* year

    reshape long equrtrns rawdina replaced adjusted pref, i(year) j(topgrp, string)
    replace topgrp = cond(topgrp == "_top1", "Top 1%", ///
                     cond(topgrp == "_top01", "Top 0.1%", ///
                     cond(topgrp == "_top001", "Top 0.01%", "Top 0.001%")))

    colorpalette "$u1", intensity(0.05(.05)1)
    local u11 = "`r(p8)'" 
    local u12 = "`r(p11)'"
    local u13 = "`r(p14)'"
    local u14 = "`r(p17)'"
    local u15 = "`r(p20)'"

    #delimit ;
    graph bar (asis) equrtrns adjusted rawdina replaced
        if year == 2016
                ,
                over(topgrp) bargap(20)
                bar(1, color("$u3")) 
                bar(2, color("$u1")) 
                bar(3, color("`u12'")) 
                bar(4, color("`u13'"))
                $gpr
                ytitle("Wealth Share (%) in 2016")
                legend(order(1 "Equal Returns" 
                             2 "BHV (2019)"
                             3 "Baseline"
                             4 "Replace")
                region(lcolor(white) margin(tiny)) row(1)
                symxsize(*.25)) xsize(8);
    #delimit cr
    graph export forbes_sensitivity_bars.pdf, replace

    sort year
    local cnt = 1
    foreach grp in "Top 1%" "Top 0.1%" "Top 0.01%" "Top 0.001%" {
        #delimit ;
        twoway (connect pref year, ms(o) lc("$u1") mc("$u1") lwidth(medthick))
            (connect equrtrns year, ms(s) lc("$u3") mc("$u3"))
            (connect replaced year, ms(oh) lc("$u1") mc("$u1") lp("_"))
            (connect adjusted year, ms(sh) lc("$u1") mc("$u1") lp("--."))
            if topgrp == "`grp'"
            , 
            $gpr
            xtitle("") 
            ytitle("`grp' Wealth Share (%)")
            legend(order(2 "Equal Returns"
                         4 "BHV (2019)"
                         1 "Baseline"
                         3 "Replace")
                    region(lcolor(white) margin(tiny)) row(1)
                    size(small)) xsize(6.5);
        #delimit cr
        graph export "forbes_sensitivity_`cnt'.pdf", replace
        local cnt = `cnt' + 1
    }

end/*%>*/

capture program drop table_divsmoreinformative/*%<*/
program define table_divsmoreinformative

/*******************************************************************************
    Appendix Table 10
	Table: Dividends are more informative than capital gains in inferring
        C-corporation holdings
*******************************************************************************/


    /***************************************************************************
        Load data, convert to equal-split, and rank by ES-adjusted capital
            gains income.
    ***************************************************************************/

    use $inputs/scf_revision.dta, clear

    * Pension and vehicle adjustments
    drop networth_pref
    gen networth_pref = networth + funded_pen_db - vehic - durables
    replace hwpen = hwpen - tot_pen_db + funded_pen_db
    replace hwoth = hwoth - veh_inst

    replace kginc = max(-3000, kginc)

    es_rank_scf, rankvar(networth_pref) othersplitvars("kginc divinc ccorw_mutf")

    gen networth_prefXwgt = networth_pref * wgt

    rename ccorw_mutf stockw_preferred
    replace kginc = max(-3000, kginc)

    tempfile regsample
    save `regsample'

    /***************************************************************************
        Run regressions and store output in tempfiles
    ***************************************************************************/

    foreach wgtvar in wgt networth_prefXwgt {

        local wgtname = cond("`wgtvar'" == "wgt", "svywgt", "wlthwgt")

        use `regsample', clear

        reg stockw_preferred divinc kginc i.year [aw = `wgtvar'], nocons
        
        local divcoef = _b[divinc]
        local kgcoef = _b[kginc]
        local divse = _se[divinc]
        local kgse = _se[kginc]
        local N = `e(N)'
        
        nlcom alphabaseline`wgtname':_b[divinc] / (_b[divinc] + _b[kginc]), post

        local alpha_coef = _b[alphabaseline`wgtname']
        local alpha_se = _se[alphabaseline`wgtname']

        clear 
        set obs 7

        gen output = cond(_n < 7 & mod(_n, 2) == 1, "coef", ///
                         cond(_n < 7, "se", "N"))
    
        #delimit ;
        gen variable = cond(inlist(_n, 1, 2), "Capital gains", 
                       cond(inlist(_n, 3, 4), "Dividends", 
                       cond(inlist(_n, 5, 6), "Implied $ \alpha$", "$ N$ (unweighted)")));
        
        gen baseline_`wgtname' = cond(_n == 1, `kgcoef', 
                                 cond(_n == 2, `kgse', 
                                 cond(_n == 3, `divcoef', 
                                 cond(_n == 4, `divse', 
                                 cond(_n == 5, `alpha_coef', 
                                 cond(_n == 6, `alpha_se', `N'))))));
        #delimit cr

        gen rownum = _n

        tempfile baseline`wgtname'
        save `baseline`wgtname''

        foreach restriction in "<=0.99" ">0.99" ">0.999" ">0.9999" {

            #delimit ;
            local outname = cond("`restriction'" == "<=0.99", "bot99", 
                            cond("`restriction'" == ">0.99", "top1", 
                            cond("`restriction'" == ">0.999", "top01", "top001")));
            #delimit cr

            use `regsample', clear

            reg stockw_preferred divinc kginc i.year [aw = `wgtvar'] ///
                if es_rank `restriction', nocons
            
            local divcoef = _b[divinc]
            local kgcoef = _b[kginc]
            local divse = _se[divinc]
            local kgse = _se[kginc]
            local N = `e(N)'
    
            nlcom alpha`outname'`wgtname':_b[divinc] / (_b[divinc] + _b[kginc]), post

            local alpha_coef = _b[alpha`outname'`wgtname']
            local alpha_se = _se[alpha`outname'`wgtname']

            clear 
            set obs 7

            gen output = cond(_n < 7 & mod(_n, 2) == 1, "coef", cond(_n < 7, "se", "N"))

            #delimit ;
            gen variable = cond(inlist(_n, 1, 2), "Capital gains", 
                           cond(inlist(_n, 3, 4), "Dividends", 
                           cond(inlist(_n, 5, 6), "Implied $ \alpha$", "$ N$ (unweighted)")));
            
            gen `outname'_`wgtname' = cond(_n == 1, `kgcoef', 
                                          cond(_n == 2, `kgse', 
                                          cond(_n == 3, `divcoef', 
                                          cond(_n == 4, `divse', 
                                          cond(_n == 5, `alpha_coef', 
                                          cond(_n == 6, `alpha_se', `N'))))));
            #delimit cr
    
            tempfile `outname'`wgtname' 
            save ``outname'`wgtname''
        }
    }

    /***************************************************************************
        Put together different specifications and prepare to make table
    ***************************************************************************/

    use `baselinesvywgt', clear
    qui merge 1:1 variable output using `baselinewlthwgt', assert(3) nogen

    foreach wgt in svywgt wlthwgt {
        foreach grp in bot99 top1 top01 top001 {
            qui merge 1:1 variable output using ``grp'`wgt'', assert(3) nogen
        }
    }

    format baseline_* bot99_* top* %12.3fc
    qui tostring baseline_* bot99_* top*, replace force usedisplayformat

    foreach column of varlist baseline_* bot99_* top* {
        qui replace `column' = "(" + `column' + ")" if output == "se"
        qui replace `column' = subinstr(`column', ".000", "", 1) if output == "N"
    }

    sort rownum
    qui replace variable = "" if output == "se"

    qui gen begtab = "\begin{tabular}{lccccc}" in 1
    qui gen endtab  = "\end{tabular}" in 1

    qui gen toprule = "\toprule" in 1
    qui gen botrule = "\bottomrule" in 1

    qui gen hline = "\hline" in 1
    qui gen cline = "\cline{2-6}" in 1

    qui gen c = " " in 1
    
    #delimit ;
    qui gen tabtitle = "& Full sample & Botttom 90\% & Top 1\% & Top 0.1\% &
                        Top 0.01\%" in 1;
    #delimit cr
 
    qui gen panelA = "\multicolumn{6}{c}{\textit{Panel A. Survey-weighted}}" in 1
    qui gen panelB = "\multicolumn{6}{c}{\textit{Panel B. Wealth $ \times$ Survey-weighted}}" in 1

    qui gen colnumbersA = "& (1) & (2) & (3) & (4) & (5)" in 1
    qui gen colnumbersB = "& (6) & (7) & (8) & (9) & (10)" in 1

    local tabname = "alpha_ccorw_mutf_regressions"

    qui listtex begtab if _n == 1 using "`tabname'.tex", replace rstyle(none)
    qui listtex toprule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    qui listtex hline if _n == 1, appendto("`tabname'.tex") rstyle(none)
    
    qui listtex panelA if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    qui listtex tabtitle if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    qui listtex colnumbersA if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    qui listtex cline if _n == 1, appendto("`tabname'.tex") rstyle(tabular)

    qui listtex variable baseline_svywgt bot99_svywgt top1_svywgt top01_svywgt ///
        top001_svywgt if output != "N", appendto("`tabname'.tex") rstyle(tabular)    

    qui listtex cline if _n == 1, appendto("`tabname'.tex") rstyle(none)      

    qui listtex variable baseline_svywgt bot99_svywgt top1_svywgt top01_svywgt ///
        top001_svywgt if output == "N", appendto("`tabname'.tex") rstyle(tabular) 

    qui listtex botrule if _n == 1, appendto("`tabname'.tex") rstyle(none)  
    
    qui listtex c c c c c c if _n == 1, appendto("`tabname'.tex") rstyle(tabular)  

    qui listtex toprule if _n == 1, appendto("`tabname'.tex") rstyle(none)  

    qui listtex panelB if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    qui listtex tabtitle if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    qui listtex colnumbersB if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    qui listtex cline if _n == 1, appendto("`tabname'.tex") rstyle(tabular)

    qui listtex variable baseline_wlthwgt bot99_wlthwgt top1_wlthwgt top01_wlthwgt ///
        top001_wlthwgt if output != "N", appendto("`tabname'.tex") rstyle(tabular)    

    qui listtex cline if _n == 1, appendto("`tabname'.tex") rstyle(none)      

    qui listtex variable baseline_wlthwgt bot99_wlthwgt top1_wlthwgt top01_wlthwgt ///
        top001_wlthwgt if output == "N", appendto("`tabname'.tex") rstyle(tabular) 

    qui listtex hline if _n == 1, appendto("`tabname'.tex") rstyle(none)  
    qui listtex botrule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    qui listtex endtab if _n == 1, appendto("`tabname'.tex") rstyle(none)
end/*%>*/
****************************************************************************
* Pensions
****************************************************************************
capture program drop graph_pension_age/*%<*/
program define graph_pension_age

    /***************************************************************************
        Appendix Figures 28a and 28b
		The Life Cycle of Pension Wealth vs. Wage Income
    ***************************************************************************/

    /***********************************************************************
        Retrieve CPI via FRED to adjust everything to 2019 dollars
    ***********************************************************************/
    load_analysis_data cpi
    tempfile cpi
    save `cpi'

    /***********************************************************************
        Load in SCF plus microfile. Keep only year, age, weight, 
            total pension wealth, wage income, and pension income. 
            Adjust all of these to 2016 dollars.
    ***********************************************************************/
    load_analysis_data scfrevision_v3

    merge m:1 year using `cpi', keep(3) keepusing(adjfactor2016 adjfactor2019) nogen

    foreach infladjvar of varlist hwpen wageinc peninc {
        replace `infladjvar' = `infladjvar' * adjfactor2016
    }

    keep year age wgt wgt1B hwpen wageinc peninc

    /***********************************************************************
        Assert age top-coded at 95; drop individuals younger than 20. 
            Put sample into age groups (20-45, 45-75, and 75+) as well as 
            five-year age bins. Then save as tempfile.
    ***********************************************************************/

    assert age <= 95 // Assert age is top-coded at 95
    drop if age < 20

    qui gen agegroup = cond(age < 45, 0, cond(age < 60, 1, cond(age < 75, 2, 3)))
    egen age_bin = cut(age), at(20(5)95)

    assert missing(age_bin) if age == 95
    assert !missing(age_bin) if age < 95
    replace age_bin = 90 if age == 95 // so top bin becomes ``90+''

    gen age_bin_midpoints = age_bin + 2.5

    assert !missing(age_bin_midpoints) & !missing(agegroup)

    tempfile scf
    save `scf'

    /***********************************************************************
        Collapse taking mean of total pension wealth over all years  
            and individuals, yielding mean of pension by age group. Save 
            this as tempfile. Have to collapse b/c egen does not allow for 
            weights.
    ***********************************************************************/

    collapse (mean) age_pen = hwpen [aw = wgt], by(agegroup)

    tempfile age_pen
    save `age_pen'

    /***********************************************************************
        Collapse main microfile by age bin and age group. Then merge 
            age-group pension wealth means tempfile onto collapsed data.
    ***********************************************************************/

    use `scf', clear

    collapse (mean) hwpen wageinc peninc age [aw = wgt], by(age_bin_midpoints age_bin agegroup)

    merge m:1 agegroup using `age_pen', assert(3) nogen

    /***********************************************************************
        Scale wealth and income variables into thousands and create 
            wide age_pen variables so we can do nice lines of mean pension 
            wealth by age group in superimposed on our plot of pension 
            wealth by age.
    ***********************************************************************/

    foreach unscaled of varlist hwpen wageinc peninc age_pen {
        replace `unscaled' = `unscaled' / 1000
    }

    sort agegroup age_bin
    forv group = 0 / 3 {
        qui summ age_bin if agegroup == `group'
        local nextbin = `r(max)' + 5

        qui summ age_pen if agegroup == `group'
        local age_pen = `r(mean)'

        gen age_pen`group' = `age_pen' if agegroup == `group' | age_bin == `nextbin'
    }

    rename age_bin age_bin_start
    gen age_bin_end = age_bin_start + 5

    sort agegroup age_bin_start
    by agegroup: gen age_bin_endpoints = age_bin_start if _n == 1
    by agegroup: replace age_bin_endpoints = age_bin_end if _n == _N


    ** Graph
    #delimit ;
    twoway  (connect hwpen age_bin_midpoints, ms(o) color("$u1")) 
            (connect wageinc age_bin_midpoints, ms(t) color("$u3")) 
            (connect peninc age_bin_midpoints, ms(s) color("$p2")) 
            (line age_pen0 age_bin_endpoints, lc("$u4") lp(dash))
            (line age_pen1 age_bin_endpoints, lc("$u4") lp(dash)) 
            (line age_pen2 age_bin_endpoints, lc("$u4") lp(dash)) 
            (line age_pen3 age_bin_endpoints, lc("$u4") lp(dash))
            , 
            $gpr
            xtitle("Age") ytitle("Means within Age Group (thousands $2016)") 
            xscale(range(20 97)) xlabel(25(10)95)
            yscale(range(0 402)) ylabel(0(100)400)
            legend(order(1 "Pension (Funded DB + DC) Wealth" 4 "Mean Pension Wealth"
                         2 "Wage Income" 3 "Pension Income") 
                   region(lcolor(white) margin(tiny)) row(1))
            xsize(10);
    #delimit cr
    graph export lifecycle_pen_wage.pdf, replace

    /***************************************************************************
        Flow-Stock Relationships for Pension Wealth Vary with Age
    ***************************************************************************/

    /***********************************************************************
        Load microfile from 9.A.1-2 back in and collapse to yield 
            aggregates of pension wealth, wage income, and pension income by 
            age.  Compute a "wage return" and "pension return" for each group  
            with these aggregates. Summarize to yield aggregates of each  
            variable across age group and store these in local macros. Add  
            aggregates and aggregate returns into a new row.
    ***********************************************************************/
    
    use `scf', clear

    collapse (sum) hwpen wageinc peninc [fw = wgt1B], by(agegroup)

    replace hwpen = hwpen / 1E9
    replace wageinc = wageinc / 1E9
    replace peninc = peninc / 1E9

    gen return_pen = 100 * (peninc / hwpen )
    gen return_wage = 100 * (wageinc / hwpen )
    
    foreach aggvar of varlist hwpen wageinc peninc {
        summ `aggvar', meanonly
        local tt`aggvar' = `r(sum)'
    }

    set obs `=_N + 1'
    replace agegroup = -1 if missing(agegroup)
    replace return_pen = 100 * (`ttpeninc' / `tthwpen') if agegroup == -1
    replace return_wage = 100 * (`ttwageinc' / `tthwpen') if agegroup == -1

    /***********************************************************************
       Transpose data in preparation for making graphs; rename 
            transposed data so that variable names make sense.
    ***********************************************************************/

    keep agegroup return_*
    xpose, clear varname
    drop if _varname == "agegroup"

    rename (v1 v2 v3 v4 v5 _varname) ///
        (return_20_45 return_45_59 return_60_74 return_75plus return_all flow)

    replace flow = subinstr(flow, "return_", "", 1)

    /***********************************************************************
        Make graphs
    ***********************************************************************/

    foreach flow in wage pen {
        local flowname = cond("`flow'" == "pen", "Pension", proper("`flow'"))
        local ylab = cond("`flow'" == "pen", "0(2)10", "0(25)100")
        local subfig = cond("`flow'" == "pen", "ii", "i")

        colorpalette "$u3", intensity(0.1(.05)1)

        #delimit ;
        graph bar return_all return_20_45 return_45_59 return_60_74 return_75plus
            if flow == "`flow'"
            ,
            $gpr
            bargap(10) 
            bar(2, color(`r(p3)')) bar(3, color(`r(p9)')) bar(4, color(`r(p14)'))
            bar(5, color(`r(p19)'))
            ytitle("`flowname' income / Pension Wealth (%)") ylab(`ylab')            
            legend(order(1 "All" 2 "Under 45" 3 "45 to 59" 4 "60 to 74" 5 "75 or older")
                region(lc(white) margin(tiny)) row(1))
            xsize(7);
        #delimit cr
        graph export `flow'inc_returns_pensions.pdf, replace
    }

end/*%>*/

capture program drop graph_socialsecurity/*%<*/
program define graph_socialsecurity
	
    /***************************************************************************
        Appendix Figure 28c 
		Wealth concentration after adding Social Security
    ***************************************************************************/

    /***********************************************************************
        Load in collapses of Social Security wealth (in trillions) 
            from Catherine Miller Sarin and save as tempfile. Scale into 
            dollars to match other data sources.
    ***********************************************************************/

    load_analysis_data sarin
    tempfile cms
    save `cms'

    /***********************************************************************
         Load top 0.1% Social Security wealth (in percent of wealth)
            from Sabelhaus Volz and save as tempfile
    ***********************************************************************/

    load_analysis_data sabelhaus
    tempfile shv
    save `shv'

    /***********************************************************************
        Load in tax data via program; keep only "All" and top 0.1; 
            reshape wide.
    ***********************************************************************/
    load_analysis_data szz

    keep year group hweal_preferred
    keep if inlist(group, "All", "P99.9-100")

    replace group = cond(group == "All", "_total", "_top01")

    reshape wide hweal_preferred, i(year) j(group, string)

    rename hweal_preferred_* *_wealth 

    /***********************************************************************
        Merge on Social Security series and national income series, 
            keeping only matched observations. Scale down household wealth 
            and national income into trillions to match Social Security data
    ***********************************************************************/

    merge 1:1 year using `cms', assert(1 3)
    assert !inrange(year, 1989, 2016) | mod(year - 1989, 3) > 0 if _merge == 1
    assert _merge == 3 if inrange(year, 1989, 2016) & mod(year - 1989, 3) == 0
    drop if _merge == 1
    drop _merge

    merge 1:1 year using `shv'
    assert year == 2019 if _merge == 2
    assert year < 1995 if _merge == 1
    assert _merge == 3 if inrange(year, 1995, 2016)
    keep if inlist(_merge, 1, 3)
    drop _merge

    /***********************************************************************
        Calculate top shares with and without SSW 
    ***********************************************************************/

    foreach spec in cms shv {
        gen total_wealth_ssw_`spec' = total_wealth + total_ssw_`spec'
        gen top01_wealth_ssw_`spec' = top01_wealth + top01_ssw_`spec'
    
        gen top01sh_ssw_`spec' = (top01_wealth_ssw_`spec' / total_wealth_ssw_`spec') * 100
    }

    gen top01sh = (top01_wealth / total_wealth) * 100

    /***********************************************************************
        Make graph
    ***********************************************************************/

    outsheet using social_security_top01.csv, comma replace

    #delimit ;
    twoway (connect top01sh_ssw_cms year, lc("$u3") mc("$u3") ms(d)) 
           (connect top01sh_ssw_shv year, lc("$u3") mc("$u3") ms(dh) lp(-)) 
           (connect top01sh year, ms(O) lc("$u1") mc("$u1") lw(medthick))
        ,
        $gpr
        ytitle("Top 0.1% Share of Wealth (%)") 
        xtitle(" ") 
        xlab(1989(3)2016, labsize(small)) xsca(range(1988 2017))
        ylab(8(2)16) ysca(range(7 17))
        legend(order(3 "Baseline" 
                     1 "+ Social Security (CMS 2020)"
                     2 "+ Social Security (SHV 2020)")
                region(lcolor(white) margin(tiny)) row(1)) 
        xsize(8);
    #delimit cr
    graph export "top01share_with_social_security.pdf", replace    

end/*%>*/
****************************************************************************
* Housing
****************************************************************************
capture program drop graph_usa_cross/*%<*/
program define graph_usa_cross
	*Appendix Figure 30a
    load_analysis_data housing_factors

    keep if year == 2012
    gen proptax_rate = 100/factor
    sum proptax_rate, d
    local cutlist "`r(p5)' `r(p25)' `r(p50)' `r(p75)' `r(p95)'"

    keep proptax_rate state*
    export delim using wealth_figure7a_v4.csv, replace

    colorpalette "$u1", intensity(0.05(.05)1)

    maptile proptax_rate, ///
        geo(statehex) rangec("`r(p1)'" "`r(p20)'") cutv(`cutlist') ///
        savegraph("factor_cross_states.pdf") replace res(0.5)

end/*%>*/

capture program drop graph_states_overtime/*%<*/
program define graph_states_overtime
	*Appendix Figure 30b
    load_analysis_data housing_factors

    keep if state == "CA" & year >= 1975 & year <= 2016
    #delimit ;
    twoway 
        (connect factor_housing year, ms(o) lc("$u1") mc("$u1")) 
        (connect agg_factor_housing year, ms(s) lc("$u3") mc("$u3")),
        ytitle("Housing Capitalization Factor") xtitle("")
        $gpr xsize(7.5) xlab(1975(5)2015)
        ysca(range(0 300)) ylab(0(50)300)
        xline(1978, lc(.8*black) lp(dash))
        legend(label(1 "Housing Factor (CA)") label(2 "Housing Factor (US)")
               region(lc(white)));
    #delimit cr
    graph export "factor_timeseries_CA.pdf", replace

end/*%>*/

********************************************************************************
* SCF Standard Errors
********************************************************************************
capture program drop store_estimates_ses_cis /*%<*/
program define store_estimates_ses_cis
	syntax, outname(string) pointestimate(real) stderror(real) [addtlname(string) scaleby(real 1)]

	qui drop _all
	qui set obs 1

	if "`addtlname'" != "" {
		qui gen `addtlname' = "`addtlname'"
	}

	qui gen `outname' = `pointestimate' * `scaleby'

	qui gen `outname'_se = `stderror' * `scaleby'

	qui gen `outname'_1se_lb = (`pointestimate' * `scaleby') - `outname'_se
	qui gen `outname'_1se_ub = (`pointestimate' * `scaleby') + `outname'_se

	qui gen `outname'_ci95_lb = (`pointestimate' * `scaleby') - 1.96 * `outname'_se
	qui gen `outname'_ci95_ub = (`pointestimate' * `scaleby') + 1.96 * `outname'_se
end/*%>*/

capture program drop scf_shares_ses/*%<*/
program define scf_shares_ses, rclass
	syntax varname(numeric), implicateid(varname) topgrp(real) [distribution(string) shareof(string) reps(integer 200) imps(integer 5)]

    /*******************************************************************************
        Parse program inputs and save original data as tempfile
    *******************************************************************************/

        * Store variable name
        local varname "`varlist'" // Why is Stata like this?

        capture confirm variable `varname', exact
        if _rc > 0 {
            di as error "Do not abbreviate variable name"
            exit 111
        }

        * Parse optional ``share of'' argument
        if "`shareof'" == "" {
            di "No share of variable specified; assuming share of `varname'"
            local shareof = "`varname'"
        }
        else {
            confirm numeric variable `shareof'
        }

        * Parse distribution input
        if "`distribution'" == "" {
            di "No distribution specified; assuming distribution of `shareof'"
            local distribution = "`shareof'"
        }
        else {
            confirm numeric variable `distribution'
        }

        local percentile = 1 - (`topgrp' / 100)
        local grpperc = `topgrp'
        assert inrange(`percentile', 0, 1)

        * Store original data
        tempfile original
        qui save `original'

    /*******************************************************************************
        Mimic scfcombo approach for imputation variability
    *******************************************************************************/

        /***************************************************************************
             Prepare data
        ***************************************************************************/

        capture confirm variable implicatenum
        if _rc > 0 {
            gen implicatenum = mod(`implicateid', 10)
            assert inlist(implicatenum, 1, 2, 3, 4, 5)
        }

        tempvar rank topgrp
        qui cumul `distribution' [aw = x42001], gen(`rank') equal
        if (`grpperc' == 0.999) {
            gen `topgrp' = `rank' > 0.99 & `rank' < 0.999
        }
        else {
            gen `topgrp' = `rank' > `percentile'
        }

        qui gen `varname'_wgtd = `varname' * x42001
        if "`shareof'" != "`varname'" {
            qui gen `shareof'_wgtd = `shareof' * x42001
        }

        sort implicatenum
        qui by implicatenum: egen `shareof'_wgtd_tot = total(`shareof'_wgtd)
        qui by implicatenum: assert `shareof'_wgtd_tot[1] ==  `shareof'_wgtd_tot[_N]

        qui gen `varname'_wgtd_sh = `varname'_wgtd / `shareof'_wgtd_tot

        tempfile impvarorig
        qui save `impvarorig'

        forv implicatenum = 1 / 5 {

            use `impvarorig', clear

        /***************************************************************************
             Ensure sane aggregate estimate
        ***************************************************************************/

            if "`varname'" == "`shareof'" {
                qui summ `varname'_wgtd_sh if implicatenum == `implicatenum'
                local totestimate = `r(sum)'

                assert inrange(`totestimate', 0.9999, 1.0001)
            }

        /***************************************************************************
             Get share for top group
        ***************************************************************************/
            
            qui keep if implicatenum == `implicatenum' & `topgrp' == 1

            gcollapse (sum) totestimate = `varname'_wgtd_sh

            if `implicatenum' == 1 {
                tempfile totestimates_impvar
                qui save `totestimates_impvar'
            }
            else {
                append using `totestimates_impvar'

                qui save `totestimates_impvar', replace
            }
        }

        /***************************************************************************
             Store bootstrap estimates in matrices
        ***************************************************************************/

        quietly mat accum VCE_impvar = totestimate, dev nocons means(coefs)
        mat colnames coefs = _cons
        mat rownames VCE_impvar = _cons
        mat colnames VCE_impvar = _cons

        local adj = 1 / (5 - 1)
        mat VCE_impvar = VCE_impvar * `adj'

    /*******************************************************************************
         Try to mimic scfcombo approach for sampling variability
    *******************************************************************************/

        /***************************************************************************
             Set up original data file we'll boostrap through
        ***************************************************************************/

        use `impvarorig' 

        drop `varname'_wgtd*

        qui keep if implicatenum == 1

        tempfile orig
        qui save `orig'

        forv repetition = 1 / 200 {

        /***************************************************************************
             Set up data for each bootstrap repetition: this entails dropping 
                observations with missing MI markers, expanding according to MI
                markers, constructing weighted net worth share using repetition-
                specific weights, and marking a top 1% group.
        ***************************************************************************/

            qui use `orig', clear

            qui keep if !missing(mm`repetition')
            qui expand mm`repetition'

            qui keep `varname' `shareof' `distribution' wt1b`repetition'

            tempvar rank topgrp
            qui cumul `distribution' [aw = wt1b`repetition'], gen(`rank') equal
            qui gen `topgrp' = `rank' >= `percentile'

            qui gen `varname'_wgtd = `varname' * wt1b`repetition'
            if "`shareof'" != "`varname'" {
                gen `shareof'_wgtd = `shareof' * wt1b`repetition'
            }

            qui egen `shareof'_wgtd_tot = total(`shareof'_wgtd)
            assert `shareof'_wgtd_tot[1] ==  `shareof'_wgtd_tot[_N]

            qui gen `varname'_wgtd_sh = `varname'_wgtd / `shareof'_wgtd_tot

        /***************************************************************************
            Ensure implied aggregate share = 1
        ***************************************************************************/

            if "`shareof'" == "`varname'" {
                local Nhat = _N 

                qui summ `varname'_wgtd_sh		
                local totestimate = `r(sum)'

                assert inrange(`totestimate', 0.999999, 1.000001)
            }
            
        /***************************************************************************
            Compute desired bootstrap total
        ***************************************************************************/

            qui keep if `topgrp' == 1
            gcollapse (sum) totestimate = `varname'_wgtd_sh

            if `repetition' == 1 {
                tempfile totestimate_sampvar
                qui save `totestimate_sampvar'
            }
            else {
                append using `totestimate_sampvar'

                qui save `totestimate_sampvar', replace
            }
        }

        quietly matrix accum VCE_sampvar = totestimate, dev nocons
        matrix rownames VCE_sampvar = _cons
        matrix colnames VCE_sampvar = _cons

        local adj = 1 / (200 - 1)

        matrix VCE_sampvar = VCE_sampvar * `adj'

    /*******************************************************************************
        Combine imputation and sampling variance to get actual variance of mean
    *******************************************************************************/

        local adj2 = (5 + 1) / 5 
        mat VCE_impvar =`adj2' * VCE_impvar
        mat VCE_impvar = VCE_impvar + VCE_sampvar

        local pointestimate = coefs[1,1]
        local meanvariance = VCE_impvar[1,1]
        local meanstderror = sqrt(VCE_impvar[1,1])

    /*******************************************************************************
        Return point estimate and standard error; then load original data
    *******************************************************************************/

        return local pointestimate = `"`pointestimate'"'
        return local variance = `"`meanvariance'"' 
        return local stderror = `"`meanstderror'"'

        use `original', clear
end/*%>*/

capture program drop scf_returns_ses/*%<*/
program define scf_returns_ses, rclass

	syntax, stock(string) flow(string) year(integer) implicateid(varname) topgrp(real) distribution(string) [newvar(string) reps(integer 200) imps(integer 5) scaleby(integer 100)]

/*******************************************************************************
	Parse program inputs and save original data as tempfile
*******************************************************************************/

	* Store variable name
	capture confirm numeric variable `stock' `flow', exact
	if _rc > 0 {
		local flow = upper("`flow'")
	}

	capture confirm numeric variable `stock' `flow', exact
	if _rc > 0 {
		noisily display as error "Stock or flow not found or not numeric; do not abbreviate"
		exit 111
	}
	
	* Define percentile
	local percentile = 1 - (`topgrp' / 100)
	assert inrange(`percentile', 0, 1)

	local ptilename = `percentile' * 100
	display "Calculating returns for P`ptilename'-100"

	* Store original data
	tempfile original
	qui save `original'

/*******************************************************************************
	Mimic scfcombo approach for imputation variability
*******************************************************************************/

	/***************************************************************************
		Prepare data
	***************************************************************************/

	capture confirm variable implicatenum
	if _rc > 0 {
		gen implicatenum = mod(`implicateid', 10)
	}
	
	qui levelsof implicatenum, local(implicates) clean
	capture assert "`implicates'" == "1 2 3 4 5"
	if _rc > 0 {
		noisily di as error "Implicates must be numbered 1-5"
		exit 111
	}

	qui gen `stock'_wgtd = `stock' * x42001
	qui gen `flow'_wgtd = `flow' * x42001

	tempfile impvarorig
	qui save `impvarorig'

	forv implicatenum = 1 / 5 { // Cycle through implicates

		use `impvarorig', clear

	/***************************************************************************
		Get aggregate estimate
	***************************************************************************/
		
		tempvar rank
		qui cumul `distribution' [aw = x42001], gen(`rank') equal
		qui gen topgrp = `rank' >= `percentile'
		
		qui keep if implicatenum == `implicatenum' & topgrp == 1
		/* qui keep if `if' */

		gcollapse (sum) `stock' = `stock'_wgtd `flow' = `flow'_wgtd
		assert _N == 1

		qui gen r = (`flow' / `stock') * `scaleby'

		if `implicatenum' == 1 {
			tempfile totestimates_impvar
			qui save `totestimates_impvar'
		}
		else {
			append using `totestimates_impvar'

			qui save `totestimates_impvar', replace
		}
	}

	/***************************************************************************
		Store bootstrap estimates in matrices
	***************************************************************************/
	
	quietly mat accum VCE_impvar = r, dev nocons means(coefs)
	mat colnames coefs = _cons
	mat rownames VCE_impvar = _cons
	mat colnames VCE_impvar = _cons

	local adj = 1 / (5 - 1)
	mat VCE_impvar = VCE_impvar * `adj'

/*******************************************************************************
	Try to mimic scfcombo approach for sampling variability
*******************************************************************************/

	/***************************************************************************
		Set up original data file we'll boostrap through
	***************************************************************************/

	use `impvarorig' 
	
	drop `stock'_wgtd* `flow'_wgtd*

	qui keep if implicatenum == 1

	tempfile orig
	qui save `orig'
	
	forv repetition = 1 / 200 {

	/***************************************************************************
		Set up data for each bootstrap repetition: this entails dropping 
			observations with missing MI markers, expanding according to MI
			markers, constructing weighted net worth share using repetition-
			specific weights, and marking a top 1% group.
	***************************************************************************/

		qui use `orig', clear

		qui keep if !missing(mm`repetition')
		qui expand mm`repetition'

		qui keep `stock' `flow' `distribution' wt1b`repetition'

		tempvar rank
		qui cumul `distribution' [aw = wt1b`repetition'], gen(`rank') equal
		qui gen topgrp = `rank' >= `percentile'

		qui gen `stock'_wgtd = `stock' * wt1b`repetition'
		qui gen `flow'_wgtd = `flow' * wt1b`repetition'
		
	/***************************************************************************
		Compute desired bootstrap total
	***************************************************************************/

		qui keep if topgrp == 1
		gcollapse (sum) `stock' = `stock'_wgtd `flow' = `flow'_wgtd
		assert _N == 1

		qui gen r = (`flow' / `stock') * `scaleby'

		if `repetition' == 1 {
			tempfile totestimate_sampvar
			qui save `totestimate_sampvar'
		}
		else {
			append using `totestimate_sampvar'

			qui save `totestimate_sampvar', replace
		}
	}

	quietly matrix accum VCE_sampvar = r, dev nocons
	matrix rownames VCE_sampvar = _cons
	matrix colnames VCE_sampvar = _cons

	local adj = 1 / (200 - 1)

	matrix VCE_sampvar = VCE_sampvar * `adj'

/*******************************************************************************
	Combine imputation and sampling variance to get actual variance of mean
*******************************************************************************/

	local adj2 = (5 + 1) / 5
	mat VCE_impvar =`adj2' * VCE_impvar
	mat VCE_impvar = VCE_impvar + VCE_sampvar

	local timename = subinstr("$S_TIME", ":", "_", .)

	local pointestimate = coefs[1,1]
	local meanvariance = VCE_impvar[1,1]
	local meanstderror = sqrt(VCE_impvar[1,1])

/*******************************************************************************
	Return point estimate and standard error; then load original data
*******************************************************************************/

	return local pointestimate = `"`pointestimate'"'
	return local variance = `"`meanvariance'"' 
	return local stderror = `"`meanstderror'"'

	use `original', clear
end/*%>*/

capture program drop scf_returnratios_ses/*%<*/
program define scf_returnratios_ses, rclass
	syntax, stock(string) flow(string) year(integer) implicateid(varname) topgrp(real) distribution(string) [newvar(string) reps(integer 200) imps(integer 5) scaleby(integer 100)]

/*******************************************************************************
	Parse program inputs and save original data as tempfile
*******************************************************************************/

	* Store variable name
	capture confirm numeric variable `stock' `flow', exact
	if _rc > 0 {
		local flow = upper("`flow'")
	}

	capture confirm numeric variable `stock' `flow', exact
	if _rc > 0 {
		noisily display as error "Stock or flow not found or not numeric; do not abbreviate"
		exit 111
	}
	
	* Define percentile
	local percentile = 1 - (`topgrp' / 100)
	assert inrange(`percentile', 0, 1)

	local ptilename = `percentile' * 100
	display "Calculating returns for P`ptilename'-100"

	* Store original data
	tempfile original
	qui save `original'

/*******************************************************************************
	Mimic scfcombo approach for imputation variability
*******************************************************************************/

	/***************************************************************************
		Prepare data
	***************************************************************************/

	capture confirm variable implicatenum
	if _rc > 0 {
		gen implicatenum = mod(`implicateid', 10)
	}
	
	qui levelsof implicatenum, local(implicates) clean
	capture assert "`implicates'" == "1 2 3 4 5"
	if _rc > 0 {
		noisily di as error "Implicates must be numbered 1-5"
		exit 111
	}

	qui gen `stock'_wgtd = `stock' * x42001
	qui gen `flow'_wgtd = `flow' * x42001

	tempfile impvarorig
	qui save `impvarorig'

	forv implicatenum = 1 / 5 { // Cycle through implicates

		use `impvarorig', clear

	/***************************************************************************
		Get aggregate estimate
	***************************************************************************/
		
		tempvar rank
		qui cumul `distribution' [aw = x42001], gen(`rank') equal
		qui gen topgrp = `rank' >= `percentile'
		
		qui keep if implicatenum == `implicatenum' 
		/* qui keep if `if' */

		gcollapse (sum) `stock' = `stock'_wgtd `flow' = `flow'_wgtd, by(topgrp)
		assert _N == 2
        egen total_`stock' = total(`stock')
        egen total_`flow' = total(`flow')
        keep if topgrp == 1

		gen r = (`flow' / `stock') * `scaleby'
        gen r_macro = (total_`flow' / total_`stock') * `scaleby'
        gen r_ratio = r / r_macro

		if `implicatenum' == 1 {
			tempfile totestimates_impvar
			qui save `totestimates_impvar'
		}
		else {
			append using `totestimates_impvar'

			qui save `totestimates_impvar', replace
		}
	}

	/***************************************************************************
		Store bootstrap estimates in matrices
	***************************************************************************/
	
	quietly mat accum VCE_impvar = r_ratio, dev nocons means(coefs)
	mat colnames coefs = _cons
	mat rownames VCE_impvar = _cons
	mat colnames VCE_impvar = _cons

	local adj = 1 / (5 - 1)
	mat VCE_impvar = VCE_impvar * `adj'

/*******************************************************************************
	Try to mimic scfcombo approach for sampling variability
*******************************************************************************/

	/***************************************************************************
		Set up original data file we'll boostrap through
	***************************************************************************/

	use `impvarorig' 
	
	drop `stock'_wgtd* `flow'_wgtd*

	qui keep if implicatenum == 1

	tempfile orig
	qui save `orig'
	
	forv repetition = 1 / 200 {

	/***************************************************************************
		Set up data for each bootstrap repetition: this entails dropping 
			observations with missing MI markers, expanding according to MI
			markers, constructing weighted net worth share using repetition-
			specific weights, and marking a top 1% group.
	***************************************************************************/

		qui use `orig', clear

		qui keep if !missing(mm`repetition')
		qui expand mm`repetition'

		qui keep `stock' `flow' `distribution' wt1b`repetition'

		tempvar rank
		qui cumul `distribution' [aw = wt1b`repetition'], gen(`rank') equal
		qui gen topgrp = `rank' >= `percentile'

		qui gen `stock'_wgtd = `stock' * wt1b`repetition'
		qui gen `flow'_wgtd = `flow' * wt1b`repetition'
		
	/***************************************************************************
		Compute desired bootstrap total
	***************************************************************************/

		gcollapse (sum) `stock' = `stock'_wgtd `flow' = `flow'_wgtd, by(topgrp)
		assert _N == 2
        egen total_`stock' = total(`stock')
        egen total_`flow' = total(`flow')
        keep if topgrp == 1

		gen r = (`flow' / `stock') * `scaleby'
        gen r_macro = (total_`flow' / total_`stock') * `scaleby'
        gen r_ratio = r / r_macro

		if `repetition' == 1 {
			tempfile totestimate_sampvar
			qui save `totestimate_sampvar'
		}
		else {
			append using `totestimate_sampvar'

			qui save `totestimate_sampvar', replace
		}
	}

	quietly matrix accum VCE_sampvar = r_ratio, dev nocons
	matrix rownames VCE_sampvar = _cons
	matrix colnames VCE_sampvar = _cons

	local adj = 1 / (200 - 1)

	matrix VCE_sampvar = VCE_sampvar * `adj'

/*******************************************************************************
	Combine imputation and sampling variance to get actual variance of mean
*******************************************************************************/

	local adj2 = (5 + 1) / 5
	mat VCE_impvar =`adj2' * VCE_impvar
	mat VCE_impvar = VCE_impvar + VCE_sampvar

	local timename = subinstr("$S_TIME", ":", "_", .)

	local pointestimate = coefs[1,1]
	local meanvariance = VCE_impvar[1,1]
	local meanstderror = sqrt(VCE_impvar[1,1])

/*******************************************************************************
	Return point estimate and standard error; then load original data
*******************************************************************************/

	return local pointestimate = `"`pointestimate'"'
	return local variance = `"`meanvariance'"' 
	return local stderror = `"`meanstderror'"'

	use `original', clear
end/*%>*/

capture program drop load_scf_with_replicate_weights/*%<*/
program define load_scf_with_replicate_weights

	syntax, year(integer) [prepforscfcombo(string) changecaseid(string) changeimplicateid(string) dbwealth(string)]

	/***************************************************************************
		Parse syntax
	***************************************************************************/

	if !inrange(`year', 1989, 2019) | mod(`year' - 1989, 3) != 0 {
		di as error "Invalid SCF year"
		exit 111
	}

	/***************************************************************************
		Load bulletin data and adjust to current dollars
	***************************************************************************/

		/***********************************************************************
			Load bulletin data
		***********************************************************************/

	local yr = substr("`year'", 3, 2)
	local replicatefile = cond(`year' == 2001, "scf2001rw1s", "p`yr'_rw1")

	use $inputs/rscfp`year'.dta, clear

	if "`prepforscfcombo'" != "" {
		rename wgt wgt0 // Need to rename for scfcombo to work, for some reason
	}

		/***********************************************************************
			Merge on adjustment factors and adjust to current dollars
		***********************************************************************/

	qui gen year = `year' // For merge
	qui merge m:1 year using $inputs/scf2019infladjfactors.dta, ///
		keepusing(assetadjfactor incadjfactor) assert(2 3) keep(3) nogen
	drop year

	foreach assetvar of varlist asset fin liq cds saving nmmf stocks bond retqliq ///
		savbnd cashli othma othfin checking mma call nfin vehic houses oresre nnresre ///
		bus othnfin debt mrthel resdbt othloc ccbal install odebt networth homeeq ///
		ploan1-ploan8 lloan1-lloan12 kgtotal equity deq vlease reteq veh_inst edn_inst ///
		oth_inst heloc nh_mort hdebt kghouse kgore kgbus kgstmf prepaid actbus nonactbus ///
		farmbus_kg mort? futpen currpen annuit trusts farmbus nhnfin irakh thrift mmda ///
		mmmf stmutf tfbmutf gbmutf obmutf comutf omutf notxbnd mortbnd govtbnd obnd {

		qui replace `assetvar' = `assetvar' / assetadjfactor
	}

	foreach incvar of varlist income wageinc bussefarminc intdivinc kginc ///
		ssretinc transfothinc tpay rent payloc? payhi? paylc? payore? payveh? ///
		payveo? payedu? payiln? paymarg payins paypen? revpay mortpay conspay ///
		penacctwd {

		qui replace `incvar' = `incvar' / incadjfactor
	}
	drop assetadjfactor incadjfactor

	/***************************************************************************
		Merge on replicate weights
	***************************************************************************/

	if `year' == 1989 {
		rename x1 xx1, upper
		local caseid = "XX1"
		local implicateid = "X1"
	}
	else if !inlist(`year', 2001, 2019) {
		rename y1 yy1, upper
		local caseid = "YY1"
		local implicateid = "Y1"
	}
	else {
		local caseid = "yy1"
		local implicateid = "y1"	
	}

	local keepvars = cond(inlist(`year', 2001, 2019), "wt1b1-wt1b200 mm200-mm1", "WT1B1-WT1B200 MM200-MM1")
	qui merge m:1 `caseid' using $inputs/scf_replicate_weights/`replicatefile'.dta, ///
		keepusing(`keepvars') assert(3) nogen

	if `year' == 2001 {
		rename y1 yy1, upper
		local caseid = "YY1"
		local implicateid = "Y1"	
	}

	/***************************************************************************
		Merge on full file
	***************************************************************************/

	qui merge m:1 `implicateid' `caseid' using $inputs/complete_data/p`yr'i6.dta, assert(3) nogen

	/***************************************************************************
		If ``prep for scfcombo'' is specified, create ID 1-5 which will be
			necessary for the scfcombo program
	***************************************************************************/

	if "`prepforscfcombo'" != "" {
		qui gen `prepforscfcombo' = mod(`implicateid', 10)
		assert inrange(`prepforscfcombo', 1, 5)
	} 

	/***************************************************************************
		Merge on DB wealth if option specified
	***************************************************************************/

	local dbwealth = lower("`dbwealth'")
	if inlist("`dbwealth'", "yes", "y", "t", "true") {
		qui gen year = `year'
		qui rename `implicateid' y1 // for merge

		qui merge 1:1 y1 year using "$inputs/DB_household092820.dta", ///
			assert(2 3) keep(3) keepusing(*_dbamt_*) nogen
		qui rename y1 `implicateid'

		qui egen tot_pen_db = rowtotal(*_dbamt_*)
		qui drop *_dbamt_* year
	}

	/***************************************************************************
		Rename some variables, to taste
	***************************************************************************/

	qui rename `implicateid' `caseid', lower
	local implicateid = lower("`implicateid'")
	local caseid = lower("`caseid'")
	
	if !inlist(`year', 2001, 2019) {
		qui rename X* WT* MM*, lower
	}
	else if `year' == 2001 {
		qui rename (X6841 X6826 X42001) (x6841 x6826 x42001)
	}
	else {
		confirm variable x42001
	}
	
	if "`changecaseid'" != "" {
		capture confirm variable `caseid'
		if _rc == 0 {
			qui rename `caseid' `changecaseid'
		}
		else {
			local lowercaseid = lower("`caseid'")
			qui rename `lowercaseid' `changecaseid'
		}
		local caseid = "`changecaseid'"
	}
	if "`changeimplicateid'" != "" {
		capture confirm variable `implicateid'
		if _rc == 0 {
			qui rename `implicateid' `changeimplicateid'
		}
		else {
			local lowerimplicateid = lower("`implicateid'")
			qui rename `lowerimplicateid' `changeimplicateid'
		}
		local implicateid = "`changeimplicateid'"
	}

	/***************************************************************************
		Put implicate number next to other identification numbers
	***************************************************************************/

	if "`prepforscfcombo'" != "" {
		order `implicateid' `caseid' `prepforscfcombo'
	}

end/*%>*/

capture program drop scf_stderrors_build/*%<*/
program define scf_stderrors_build

    /*******************************************************************************
        For reference, calculate top shares in raw data and under our preferred 
            definition
    *******************************************************************************/

    use $inputs/scf_revision.dta, clear

    rename networth networth_raw
    
    * QJE 3 tweaks
    rename networth_pref networth_supple
    gen networth_pref = networth_raw + funded_pen_db - vehic - durables
    replace hwpen = hwpen - tot_pen_db + funded_pen_db
    replace hwoth = hwoth - veh_inst

    qui cumul networth_raw [aw = wgt], by(year) gen(rawrank)
    qui cumul networth_pref [aw = wgt], by(year) gen(prefrank)

    * bottom 99% = 1, P99-99.9 = 2, P99.9-99.99 = 3, top 0.01% = 4
    gen rawgrp = cond(rawrank < 0.99, 1, cond(rawrank < 0.999, 2, cond(rawrank < 0.9999, 3, 4)))
    gen prefgrp = cond(prefrank < 0.99, 1, cond(prefrank < 0.999, 2, cond(prefrank < 0.9999, 3, 4)))

    tempfile orig
    save `orig'

    foreach rankspec in raw pref {

        use `orig', clear

        collapse (sum) networth_`rankspec' [fw = wgt1B], by(year `rankspec'grp)

        reshape wide networth_`rankspec', i(year) j(`rankspec'grp)

        egen networth_`rankspec'_total = rowtotal(networth_`rankspec'?)

        rename (*1 *2 *3 *4) ///
            (*_bot99`rankspec' *_p99_99pt9`rankspec' *_p99pt9_99pt99`rankspec' *_t001`rankspec')

        gen networth_`rankspec'_t01 = networth_`rankspec'_p99pt9_99pt99 + networth_`rankspec'_t001	
        gen networth_`rankspec'_t1 = networth_`rankspec'_p99_99pt9 + networth_`rankspec'_t01										   

        keep year *_t* networth_`rankspec'_total

        assert networth_`rankspec'_t001 < networth_`rankspec'_t01 
        assert networth_`rankspec'_t01 < networth_`rankspec'_t1 

        tempfile `rankspec'
        save ``rankspec''
    }

    use `raw', clear
    merge 1:1 year using `pref', assert(3) nogen

    foreach topgrp in t1 t01 t001 {
        gen `topgrp'share_raw = networth_raw_`topgrp' / networth_raw_total
        gen `topgrp'share_pref = networth_pref_`topgrp' / networth_pref_total
    }

    keep year *share_raw *share_pref

    tempfile scftopshares
    save $inputs/scftopshares20220112.dta, replace

    /*******************************************************************************
         Use programs to calculate interest rates with standard errors
    *******************************************************************************/

        /***************************************************************************
             Cycling through years, make calculations and save in separate	/*%<*/
                files
        ***************************************************************************/

    forv year = 1989(3)2019 { 
        di `year'
        foreach rankspec in raw pref {
            di "`rankspec'"
            load_scf_with_replicate_weights, year(`year') changecaseid("YY1") ///
                changeimplicateid("Y1") prepforscfcombo("implicatenum")
            
            qui gen year = `year'

            if "`rankspec'" == "raw" {
                rename networth networth_raw
            }
            else {
                qui merge 1:1 year Y1 YY1 using $inputs/scf_revision.dta, assert(2 3) ///
                    keep(3) keepusing(networth funded_pen_db vehic durables) nogen

                * QJE v3
                gen networth_pref = networth + funded_pen_db - vehic - durables
                drop networth funded_pen_db vehic durables 
            }

            tempfile orig`year'`rankspec'
            qui save `orig`year'`rankspec''
            
            foreach topgrp of numlist 1 0.1 0.01 {
                di `topgrp'
                local outname = cond(`topgrp' == 1, "t1", cond(`topgrp' == 0.1, "t01", "t001"))
                
                use `orig`year'`rankspec''
                
                scf_shares_ses networth_`rankspec', implicateid(Y1) topgrp(`topgrp') reps(500)
                
                store_estimates_ses_cis, outname("`outname'share_`rankspec'") ///
                    pointestimate(`r(pointestimate)') stderror(`r(stderror)') 
                
                gen year = `year'
                
                qui save "$dumpdir/scf`outname'share_`rankspec'`year'EZ.dta", replace
            }
        }
    }/*%>*/

	/***************************************************************************
		Append and merge files/*%<*/
	***************************************************************************/

    clear

	*From stata/dump
    forv year = 1989(3)2019 {
        append using "$inputs/scft1share_raw`year'.dta"

        local mergecode = cond(`year' == 1989, 3, 4)

        merge 1:1 year using "$inputs/scft01share_raw`year'.dta", assert(1 `mergecode') update nogen
        merge 1:1 year using "$inputs/scft001share_raw`year'.dta", assert(1 `mergecode') update nogen
        merge 1:1 year using "$inputs/scft1share_pref`year'.dta", assert(1 `mergecode') update nogen
        merge 1:1 year using "$inputs/scft01share_pref`year'.dta", assert(1 `mergecode') update nogen
        merge 1:1 year using "$inputs/scft001share_pref`year'.dta", assert(1 `mergecode') update nogen
    }

    order year

    tempfile scftopshares_ses
    save $dumpdir/scftopshares_stderrors20220112.dta, replace

end/*%>*/

capture program drop scf_stderrors/*%<*/
program define scf_stderrors

    * Runs very slowly
    scf_stderrors_build

    use $inputs/scftopshares20220112.dta, clear

    ds t*share_*
    local checklist = "`r(varlist)'"

    rename t*share_* t*share_*_check

    merge 1:1 year using $dumpdir/scftopshares_stderrors20220112.dta, assert(3) nogen

    foreach sharevar of varlist `checklist' {

        local grplab = cond(regexm("`rate'", "t1"), "Top 1%", ///
                       cond(regexm("`rate'", "t01"), "Top 0.1%", "Top 0.01%"))

        local speclab = cond(regexm("`rate'", "raw"), "raw", "preferred")

        #delimit ;
        twoway (connect `sharevar'_check year, ms(X) color("$u3"))
            (connect `sharevar' year, ms(o) color("$u1"))
            (connect `sharevar'_ci95_lb year, ms(none) color("$u1") lp("."))
            (connect `sharevar'_ci95_ub year, ms(none) color("$u1") lp("."))
            ,
            $gpr
            title("`grplab' share (%) SCF wealth, `speclab' wealth definition", 
                color(black) size(medsmall) position(11) ring(1))
            ytitle("") xtitle("")
            legend(order(2 "Point estimate" 3 "95% CI bounds" 1 "Calculated from raw") 
                region(lcolor(white) margin(tiny)) row(1));
        #delimit cr
        graph export "`sharevar'_ci95.pdf", replace
    }

end/*%>*/

capture program drop scf_stderrors_bus_build/*%<*/
program define scf_stderrors_bus_build

    /*******************************************************************************
        For reference, calculate top shares in raw data and under our preferred 
            definition
    *******************************************************************************/

    use $inputs/scf_revision.dta, clear

    * QJE v3
    drop networth_pref
    gen networth_pref = networth + funded_pen_db - vehic - durables
    replace hwpen = hwpen - tot_pen_db + funded_pen_db
    replace hwoth = hwoth - veh_inst

    tu_rank_scf, rankvar(networth_pref) outname(wlthrank_tu)

    gen prefgrp = cond(wlthrank_tu < 0.99, 0, cond(wlthrank_tu < 0.999, 1, 2))

    tempfile orig
    save `orig'

    foreach rankspec in pref {

        use `orig', clear

        collapse (sum) bus pthru ccorw hwbus networth_`rankspec' [fw = wgt1B], by(year `rankspec'grp)

        reshape wide bus pthru ccorw hwbus networth_`rankspec', i(year) j(`rankspec'grp)

        rename (*0 *1 *2) (*_bot99`rankspec' *_p99_99pt9`rankspec' *_top01`rankspec')

        foreach collapsevar in bus pthru ccorw hwbus networth_`rankspec' {
            gen `collapsevar'_top1`rankspec' = `collapsevar'_p99_99pt9`rankspec' + ///
                                               `collapsevar'_top01`rankspec'

            gen `collapsevar'_`rankspec'total = `collapsevar'_bot99`rankspec' + ///
                                                `collapsevar'_p99_99pt9`rankspec' + ///
                                                `collapsevar'_top01`rankspec'										   
        }
        rename networth_pref_preftotal networth_pref_total

        tempfile `rankspec'
        save ``rankspec''
    }

    use `pref', clear

    foreach topgrp in top1 top01 p99_99pt9 {
        gen `topgrp'share_pref = networth_pref_`topgrp'pref / networth_pref_total
        foreach v in bus pthru ccorw hwbus {
            gen `topgrp'`v'share_pref = `v'_`topgrp'pref / networth_pref_total
        }
    }

    keep year *share_pref bus*pref pthru*pref ccorw*pref hwbus*pref

    save $dumpdir/scfbizshares20220112.dta, replace

    /*******************************************************************************
         Use programs to calculate biz shares with standard errors
    *******************************************************************************/

        /***************************************************************************
            Cycling through years, make calculations and save in separate	/*%<*/
                files
        ***************************************************************************/

    forv year = 1989(3)2019 { 
        di `year'
        foreach rankspec in  pref {
            di "`rankspec'"
            load_scf_with_replicate_weights, year(`year') changecaseid("YY1") ///
                changeimplicateid("Y1") prepforscfcombo("implicatenum")
            
            qui gen year = `year'

            if "`rankspec'" == "raw" {
                rename networth networth_raw
            }
            else {
                qui merge 1:1 year Y1 YY1 using $dtadir/scf_revision.dta, assert(2 3) ///
                    keep(3) keepusing(networth funded_pen_db vehic durables pthru ccorw hwbus) nogen

                * QJE v3
                gen networth_pref = networth + funded_pen_db - vehic - durables
                drop networth funded_pen_db vehic durables
            }

            tempfile orig`year'`rankspec'
            qui save `orig`year'`rankspec''
            
            foreach topgrp of numlist 1 0.1 0.999 {
                di `topgrp'
                local outname = cond(`topgrp' == 1, "top1", cond(`topgrp' ==  0.1, "top01", "p99_99pt9"))
                
                foreach v in bus pthru ccorw hwbus {
                    use `orig`year'`rankspec''
                    scf_shares_ses `v', shareof(networth_`rankspec') implicateid(Y1) topgrp(`topgrp') reps(500)
                    
                    store_estimates_ses_cis, outname("`outname'share_`rankspec'") ///
                        pointestimate(`r(pointestimate)') stderror(`r(stderror)') 
                
                    gen year = `year'
                    
                    qui save "$dumpdir/scf`outname'`v'share_`rankspec'`year'EZ.dta", replace
                }
            }
        }
    }
    /*%>*/

	/***************************************************************************
		Append and merge files/*%<*/
	***************************************************************************/

    foreach v in bus hwbus pthru ccorw {
        clear

        forv year = 1989(3)2019 {
            append using "$dumpdir/scftop01`v'share_pref`year'EZ.dta"

            local mergecode = cond(`year' == 1989, 3, 4)

            merge 1:1 year using "$dumpdir/scftop1`v'share_pref`year'EZ.dta", assert(1 `mergecode') update nogen
            merge 1:1 year using "$dumpdir/scfp99_99pt9`v'share_pref`year'EZ.dta", assert(1 `mergecode') update nogen
        }

        order year

        tempfile scftopshares_ses
        save $dumpdir/scftopshares_`v'_stderrors20220112.dta, replace
    }
    *save `scftopshares_ses'/*%>*/

end/*%>*/

capture program drop scf_stderrors_bus/*%<*/
program define scf_stderrors_bus
	
	*Appendix Figure 35
	
    * Runs very slowly
    scf_stderrors_bus_build

    use $dumpdir/scftopshares_bus_stderrors20220112.dta, clear
    gen var = "bus"
    append using $dumpdir/scftopshares_pthru_stderrors20220112.dta
    replace var = "pthru" if var == ""
    append using $dumpdir/scftopshares_ccorw_stderrors20220112.dta
    replace var = "ccorw" if var == ""
    append using $dumpdir/scftopshares_hwbus_stderrors20220112.dta
    replace var = "hwbus" if var == ""
    
    foreach v of varlist top* p* {
        replace `v' = 100 * `v'
    }

    foreach v in "hwbus" "pthru" "ccorw" {
        foreach grp in "top01" "top1" "p99_99pt9" {

            local ylab = cond(regexm("`grp'", "p99_99pt9"), "2(2)16", ///
                         cond(regexm("`grp'", "top1"), "2(2)16", "2(2)16"))

            #delimit ;
            twoway 
                (connect `grp'share_pref year, ms(o) mc("$u1") lc("$u1"))
                (connect `grp'share_pref_ci95_lb year, ms(none) mc("$u1") lc("$u1") lp("-"))
                (connect `grp'share_pref_ci95_ub year, ms(none) mc("$u1") lc("$u1") lp("-"))
                if var == "`v'"
                ,
                $gpr
                /*title("`grplab' interest rate (%) in the SCF, `speclab' stock concept definition", 
                    color(black) size(medsmall) position(11) ring(1))*/
                ytitle("Share of Total Household Wealth (%)") xtitle("")
                xlab(1989(3)2019) xsca(range(1988 2020))
                ylab(`ylab')
                legend(order(
                             1 "SCF Component" 2 "95% CI" ) 
                    region(lcolor(white) margin(tiny)) row(1));
            #delimit cr
            graph export "`grp'share_`v'_ci95.pdf", replace
        }
    }

end/*%>*/

capture program drop estate_stderrors_int/*%<*/
program define estate_stderrors_int
	*Appendix Figure 5
    import delim using $inputs/sw_returns_collapse.csv, clear

    /*
    g w_group = 0

    replace w_group = 1 if tgetax>=3500000 & tgetax<5000000

    replace w_group = 2 if tgetax>=5000000 & tgetax<10000000

    replace w_group = 3 if tgetax>=10000000 & tgetax<20000000

    replace w_group = 4 if tgetax>=20000000 & tgetax < 50000000

    replace w_group = 5 if tgetax>=50000000
    */

    foreach v of varlist *_intrate* {
        replace `v' = 100 * `v'

        * Remove outliers and repeal year
        replace `v' = . if w_group == 3 & year == 2001
        replace `v' = . if w_group == 4 & year == 2001
        replace `v' = . if w_group == 5 & inlist(year, 2000, 2002, 2008)
        replace `v' = . if year == 2010
    }

    forv grp = 3/5 {

        local ylab = cond(regexm("`grp'", "agg"), "0(1)8", ///
                     cond(regexm("`grp'", "top1"), "0(2)10", "0(2)12"))

        #delimit ;
        twoway 
            (connect mean_intrate_mw year, ms(o) mc("$u1") lc("$u1"))
            (connect ci95h_intrate_mw year, ms(none) mc("$u1") lc("$u1") lp("-"))
            (connect ci95l_intrate_mw year, ms(none) mc("$u1") lc("$u1") lp("-"))
            if w_group == `grp'
            ,
            $gpr
            ytitle("Interest Rate (%)") xtitle("")
            xlab(1996(2)2016) xsca(range(1995 2017))
            ylab(#6)
            legend(order(1 "Preferred Definition, Mortality-Adjusted" 2 "95% CI") 
                region(lcolor(white) margin(tiny)) row(1));
        #delimit cr
        graph export "r_estate_`grp'_ci95.pdf", replace
    }

end/*%>*/

capture program drop scf_stderrors_int_build/*%<*/
program define scf_stderrors_int_build

    /*******************************************************************************
        For reference, calculate aggregate and top 1% interest rates under BHH18
            and our preferred definition without any standard errors. 
    *******************************************************************************/

        /***************************************************************************
            Prepare to collapse data: create BHH 18 stock concept, obtain 
                rank variables and designate top 1%/bottom 99%
        ***************************************************************************/

    use $inputs/scf_revision.dta, clear

    gen othma_fixed_bhh = othma - (equity - deq - reteq) // Following BHKS 2016

    /* Footnote 21 in BHH: ``The sum of interest-bearing assets in the SCF is the sum 
        of liquid deposit accounts, CDs, bonds (non-munis), government-bond mutual funds, 
        general bond mutual funds, 1/2 of combo mutual funds, savings bonds, and the 
        portion of trusts and managed investment accounts that aree invested in 
        interest-bearing assets'' */
    gen inttaxw_bhh18 = liq + cds + (bond - notxbnd) + gbmutf + obmutf + ///
                        (0.5 * comutf) + savbnd + othma_fixed_bhh

    cumul networth [aw = wgt], by(year) gen(wlthrank) equal

    tu_rank_scf, rankvar(networth_pref) outname(wlthrank_tu)

    gen bulletingrp = cond(wlthrank < 0.99, 0, cond(wlthrank < 0.999, 1, ///
                        cond(wlthrank < 0.9999, 2, 3)))
    gen preftugrp = cond(wlthrank_tu < 0.99, 0, cond(wlthrank_tu < 0.999, 1, ///
                        cond(wlthrank_tu < 0.9999, 2, 3)))

    tempfile orig
    save `orig'

        /***************************************************************************
            Collapse to yield aggregate stocks and flows by top 1% and bottom
                99%; ensure aggregates are the same regardless of how we do the 
                ranking
        ***************************************************************************/

    foreach rankspec in bulletin preftu {

        use `orig', clear

        collapse (sum) intinc inttaxw_bhh18 inttaxw [fw = wgt1B], by(year `rankspec'grp)

        foreach billionX of varlist intinc inttaxw_bhh18 inttaxw { // Unscale from weighting
            replace `billionX' = `billionX' / 1E9
        }

        reshape wide intinc inttaxw_bhh18 inttaxw, i(year) j(`rankspec'grp)

        rename (*0 *1 *2 *3) (*_bot99`rankspec' *_p99_99pt9`rankspec' *_top01pt9`rankspec' *_top001`rankspec')

        foreach collapsevar in intinc inttaxw_bhh18 inttaxw {
            gen `collapsevar'_top1`rankspec' = `collapsevar'_p99_99pt9`rankspec' + ///
                                               `collapsevar'_top01pt9`rankspec' + ///
                                               `collapsevar'_top001`rankspec'
            gen `collapsevar'_top01`rankspec' = `collapsevar'_top01pt9`rankspec' + ///
                                               `collapsevar'_top001`rankspec'
            gen `collapsevar'_`rankspec'grps = `collapsevar'_bot99`rankspec' + ///
                                               `collapsevar'_p99_99pt9`rankspec' + ///
                                               `collapsevar'_top01pt9`rankspec' + ///
                                               `collapsevar'_top001`rankspec'
        }

        drop *_p99_99pt9`rankspec' *_top01pt9`rankspec'

        tempfile `rankspec'
        save ``rankspec''
    }

    use `bulletin', clear
    merge 1:1 year using `preftu', assert(3) nogen

    foreach aggregate in intinc inttaxw_bhh18 inttaxw {
        assert `aggregate'_bulletingrps == `aggregate'_preftugrps
    }

    drop *_preftugrps
    rename *_bulletingrps *

        /***************************************************************************
            Calculate interest rates
        ***************************************************************************/

    gen r_agg_pref = (intinc / inttaxw) * 100
    gen r_agg_bhh18 = (intinc / inttaxw_bhh18) * 100

    foreach grp in top1 top01 top001 {
        gen r_`grp'_pref = (intinc_`grp'bulletin / inttaxw_`grp'bulletin) * 100
        gen r_`grp'_bhh18 = (intinc_`grp'bulletin / inttaxw_bhh18_`grp'bulletin) * 100
    }

        /***************************************************************************
            Clean up and save as tempfile
        ***************************************************************************/

    keep year r_*

    save $dumpdir/scfinterestrates20210702.dta, replace

    /*******************************************************************************
        Use programs to calculate interest rates with standard errors
    *******************************************************************************/

        /***************************************************************************
            Cycling through years, make calculations and save in separate	
                files
        ***************************************************************************/

    forv year = 1989(3)2019 { 

        load_scf_with_replicate_weights, year(`year') changecaseid("YY1") ///
            changeimplicateid("Y1") prepforscfcombo("implicatenum")
        
        qui gen othma_fixed_bhh = othma - (equity - deq - reteq) // Following BHKS 2016
        drop othma equity deq reteq
        
        /* Footnote 21 in BHH: ``The sum of interest-bearing assets in the SCF is the sum 
            of liquid deposit accounts, CDs, bonds (non-munis), government-bond mutual 
            funds, general bond mutual funds, 1/2 of combo mutual funds, savings bonds, 
            and the portion of trusts and managed investment accounts that are invested
            in interest-bearing assets'' */
        qui gen inttaxw_bhh18 = liq + cds + (bond - notxbnd) + gbmutf + obmutf + ///
                            (0.5 * comutf) + savbnd + othma_fixed_bhh
        drop liq cds bond notxbnd gbmutf obmutf comutf savbnd othma_fixed_bhh
        
        qui gen year = `year'
        
        qui merge 1:1 year Y1 YY1 using $dtadir/scf_revision.dta, assert(2 3) keep(3) ///
            keepusing(inttaxw) nogen
        
        tempfile orig`year'
        qui save `orig`year''
        
        foreach topgrp of numlist 100 1 0.1 0.01 {

            local outname = cond(`topgrp' == 100, "agg", cond(`topgrp' == 1, "top1", ///
                                cond(`topgrp' == 0.1, "top01", "top001")))
            
            use `orig`year''
            
            scf_returns_ses, stock(inttaxw_bhh18) flow(x5708) year(`year') implicateid(Y1) ///
                topgrp(`topgrp') distribution("networth") scaleby(100) reps(500)
            
            store_estimates_ses_cis, outname("r_`outname'_bhh18") pointestimate(`r(pointestimate)') ///
                stderror(`r(stderror)') 
            
            gen year = `year'
            
            qui save "$dumpdir/scfinterestrate_`outname'_bhh18defn`year'.dta", replace
            
            use `orig`year'', clear
            
            scf_returns_ses, stock(inttaxw) flow(x5708) year(`year') implicateid(Y1) ///
                topgrp(`topgrp') distribution("networth") scaleby(100) reps(500)
            
            store_estimates_ses_cis, outname("r_`outname'_pref") pointestimate(`r(pointestimate)') ///
                stderror(`r(stderror)') 
            
            gen year = `year'
            
            qui save "$dumpdir/scfinterestrate_`outname'_prefdefn`year'.dta", replace
        }
    }

        /***************************************************************************
            Append and merge files
        ***************************************************************************/

    clear

    forv year = 1989(3)2019 {
        append using "$dumpdir/scfinterestrate_agg_bhh18defn`year'.dta"

        local mergecode = cond(`year' == 1989, 3, 4)

        merge 1:1 year using "$dumpdir/scfinterestrate_top1_bhh18defn`year'.dta", assert(1 `mergecode') update nogen
        merge 1:1 year using "$dumpdir/scfinterestrate_top01_bhh18defn`year'.dta", assert(1 `mergecode') update nogen
        merge 1:1 year using "$dumpdir/scfinterestrate_top001_bhh18defn`year'.dta", assert(1 `mergecode') update nogen
        merge 1:1 year using "$dumpdir/scfinterestrate_agg_prefdefn`year'.dta", assert(1 `mergecode') update nogen
        merge 1:1 year using "$dumpdir/scfinterestrate_top1_prefdefn`year'.dta", assert(1 `mergecode') update nogen
        merge 1:1 year using "$dumpdir/scfinterestrate_top01_prefdefn`year'.dta", assert(1 `mergecode') update nogen
        merge 1:1 year using "$dumpdir/scfinterestrate_top001_prefdefn`year'.dta", assert(1 `mergecode') update nogen
    }

    order year

    save $dumpdir/scfinterestrates_stderrors20210702.dta, replace

    /*******************************************************************************
        Merge on rates from section (2) and ensure point estimates aren't crazy
    *******************************************************************************/

    use $dumpdir/scfinterestrates20210702.dta, clear

    rename r_* r_*_check

    merge 1:1 year using $dumpdir/scfinterestrates_stderrors20210702.dta, assert(3) nogen

    foreach checkvar of varlist r_agg_pref r_agg_bhh18 r_top*_pref r_top*_bhh18 {
        di "`checkvar'"
        qui gen checkdiff = abs(`checkvar' - `checkvar'_check)
        assert checkdiff < 0.03 if year > 1995
        cap assert checkdiff < 0.18
        drop checkdiff
    }
    drop *_check

end/*%>*/

capture program drop graph_scf_stderrors_int/*%<*/
program define graph_scf_stderrors_int

    *Appendix Figure 6
	scf_stderrors_int_build

    /*******************************************************************************
        Plot interest rates with standard errors
    *******************************************************************************/

    import delimited using interest_rate_series.csv, clear

    rename r_equreturns r_macro
    drop if missing(r_macro)
    tempfile macrorate
    save `macrorate'

    use $dumpdir/scfinterestrates_stderrors20210702.dta, clear
    merge 1:1 year using `macrorate', keep(3) nogen

    foreach suf in "1" "01" "001" {
        gen r_ratiotop`suf'_pref = r_top`suf'_pref / r_macro
        gen r_ratiotop`suf'_bhh18 = r_top`suf'_bhh18 / r_macro

        gen r_ratiotop`suf'_pref_se = r_top`suf'_pref_se / r_macro
        gen r_ratiotop`suf'_bhh18_se = r_top`suf'_bhh18_se / r_macro

        gen r_ratiotop`suf'_pref_ub = r_top`suf'_pref_ci95_ub / r_macro
        gen r_ratiotop`suf'_pref_lb = r_top`suf'_pref_ci95_lb / r_macro

        gen r_ratiotop`suf'_bhh18_ub = r_top`suf'_bhh18_ci95_ub / r_macro
        gen r_ratiotop`suf'_bhh18_lb = r_top`suf'_bhh18_ci95_lb / r_macro
    }

    foreach rate in "top1" "top01" "top001" {

        local grplab = cond(regexm("`rate'", "agg"), "Aggregate", ///
                       cond(regexm("`rate'", "top1"), "Top 1%", ///
                       cond(regexm("`rate'", "top01"), "Top 0.1%", "Top 0.01%")))

        local ylab = cond(regexm("`rate'", "agg"), "0(1)8", ///
                     cond(regexm("`rate'", "top1"), "0(2)10", ///
                     cond(regexm("`rate'", "top01"), "0(2)12", "0(3)21")))

        local speclab = cond(regexm("`rate'", "bhh18"), "BHH (2018)", "preferred")

        #delimit ;
        twoway 
            (connect r_`rate'_bhh18 year, ms(s) mc("$u3") lc("$u3"))
            (connect r_`rate'_bhh18_ci95_lb year, ms(none) mc("$u3") lc("$u3") lp("_"))
            (connect r_`rate'_bhh18_ci95_ub year, ms(none) mc("$u3") lc("$u3") lp("_"))
            (connect r_`rate'_pref year, ms(o) mc("$u1") lc("$u1"))
            (connect r_`rate'_pref_ci95_lb year, ms(none) mc("$u1") lc("$u1") lp("-"))
            (connect r_`rate'_pref_ci95_ub year, ms(none) mc("$u1") lc("$u1") lp("-"))
            ,
            $gpr
            ytitle("Interest Rate (%)") xtitle("")
            xlab(1989(3)2016) xsca(range(1988 2017))
            ylab(`ylab')
            legend(order(4 "Preferred Definition" 5 "95% CI"
                         1 "BHH Definition" 2 "95% CI" ) 
                region(lcolor(white) margin(tiny)) row(2));
        #delimit cr
        graph export "r_scf_`rate'_ci95.pdf", replace

        if ("`rate'" != "agg") {
            local ylab = cond("`rate'" == "top001", "0(3)15", ///
                         cond("`rate'" == "top01", "0(1)5", "0(1)4"))

            #delimit ;
            twoway 
                (connect r_ratio`rate'_bhh18 year, ms(s) mc("$u3") lc("$u3"))
                (connect r_ratio`rate'_bhh18_lb year, ms(none) mc("$u3") lc("$u3") lp("_"))
                (connect r_ratio`rate'_bhh18_ub year, ms(none) mc("$u3") lc("$u3") lp("_"))
                (connect r_ratio`rate'_pref year, ms(o) mc("$u1") lc("$u1"))
                (connect r_ratio`rate'_pref_lb year, ms(none) mc("$u1") lc("$u1") lp("-"))
                (connect r_ratio`rate'_pref_ub year, ms(none) mc("$u1") lc("$u1") lp("-"))
                ,
                $gpr
                ytitle("Ratio of Rate to Equal Returns Rate") xtitle("")
                xlab(1989(3)2016) xsca(range(1988 2017))
                ylab(`ylab')
                legend(order(4 "Preferred Definition" 5 "95% CI"
                             1 "BHH Definition" 2 "95% CI" ) 
                    region(lcolor(white) margin(tiny)) row(2));
            #delimit cr
            graph export "r_scf_ratio`rate'_ci95.pdf", replace
        }
    }

end/*%>*/

capture program drop scf_rates/*%<*/
program define scf_rates

    tempfile orig
    save `orig' 

    * generated in appendix.scf_stderrors_ratio() tempfile scf_r
    use $dumpdir/scfinterestrates_stderrors20210702.dta, clear
    keep year r_top*1_bhh18 r_top*1_pref 
    save `scf_r'

    import delimited using interest_rate_series.csv, clear

    rename r_equreturns r_macro
    drop if missing(r_macro)
    tempfile macrorate
    save `macrorate'

    use `scf_r', clear
    merge 1:1 year using `macrorate', keep(3) nogen

    gen r_ratio1_pref = r_top1_pref / r_macro
    gen r_ratio1_bhh18 = r_top1_bhh18 / r_macro
    gen r_ratio01_pref = r_top01_pref / r_macro
    gen r_ratio01_bhh18 = r_top01_bhh18 / r_macro
    gen r_ratio001_pref = r_top001_pref / r_macro
    gen r_ratio001_bhh18 = r_top001_bhh18 / r_macro


    use `orig', clear

end/*%>*/

capture program drop fixinc_aggs/*%<*/
program define fixinc_aggs

	*Appendix Figure 2
	
    tempfile rate_panel div_panel

    tempfile nonqualdivs
    load_analysis_data szz
    keep if group == "All"
    keep year nonqualdivs
    save `nonqualdivs'

    load_analysis_data interest_info

    drop *_sec *_kid*
    collapse (sum) interest_* txintamt qualtxblediv txdivamt adjgross, by(year)
    merge 1:1 year using `nonqualdivs', keep(1 3) nogen

    egen interest_info = rsum(interest_*)
    gen interest_info_share = 100 * interest_info / txintamt

    foreach intsrc in "1099bank" "1099sav" "1099loan" "1065" "1120s" "1041" {
        gen incomecompo_`intsrc' = 100 * (interest_`intsrc' / txintamt)
    }
    gen incomecompo_nonqualdivs = 100 * (nonqualdivs / txdivamt)

    foreach intsrc in "1099bank" "1099sav" "1099loan" "1065" "1120s" "1041" {
        gen incomecompo_`intsrc'R = 100 * (interest_`intsrc' / interest_info)
    }

    foreach intsrc in "1099bank" "1099sav" "1099loan" "1065" "1120s" "1041" {
        replace interest_`intsrc' = 1e-9 * interest_`intsrc' 
    }
    replace nonqualdivs = 1e-9 * nonqualdivs
    replace txintamt = 1e-9 * txintamt

    #delimit ;
    twoway 
        (connect interest_1099bank year, msize(small) lw(thin) mc("$u1") lc("$u1") ms(o)) 
        (connect interest_1099loan year, msize(small) lw(thin) mc("$p2") lc("$p2") ms(d))
        (connect interest_1099sav year, msize(small) lw(thin) mc("$f1") lc("$f1") ms(oh))
        (connect interest_1065 year, msize(small) lw(thin) mc("$u3") lc("$u3") ms(s)) 
        (connect interest_1120s year, msize(small) lw(thin) mc("$f3") lc("$f3") ms(t)) 
        (connect interest_1041 year, msize(small) lw(thin) mc("$u4") lc("$u4") ms(+) lp("-"))
        (connect nonqualdivs year, msize(small) lw(thin) mc("$f3") lc("$f3") ms(sh))
        (connect txintamt year, msize(small) lw(thin) mc("$u1") lc("$u1") ms(dh))
        ,
        xtitle("") ytitle("Aggregate (Nominal USD, Billions)")
        ylab(0(30)240)
        xsca(range(2000.5 2016.5))
        xlab(2001(3)2016)
        legend(order(1 "1099-INT Bank" 4 "1065-K1" 5 "1120S-K1" 
                     3 "Savings Bond" 2 "1099-INT Loan" 6 "1041-K1"
                     7 "Non-Qual Divs" 8 "Taxable Interest") col(3)
                     symxsize(*.8)
               region(lc(white))) $gpr xsize(5.5);
    graph export "interest_aggregates_year.pdf", replace;
    twoway 
        (connect incomecompo_1099bankR year, msize(small) lw(thin) mc("$u1") lc("$u1") ms(o)) 
        (connect incomecompo_1099loanR year, msize(small) lw(thin) mc("$p2") lc("$p2") ms(d))
        (connect incomecompo_1099savR year, msize(small) lw(thin) mc("$f1") lc("$f1") ms(oh))
        (connect incomecompo_1065R year, msize(small) lw(thin) mc("$u3") lc("$u3") ms(s)) 
        (connect incomecompo_1120sR year, msize(small) lw(thin) mc("$f3") lc("$f3") ms(t)) 
        (connect incomecompo_1041R year, msize(small) lw(thin) mc("$u4") lc("$u4") ms(+) lp("-"))
        ,
        xtitle("") ytitle("Share of Aggregate Info Return Interest (%)")
        yscale(range(0 100)) ylab(0(20)100)
        legend(order(1 "1099-INT Bank" 4 "1065-K1" 5 "1120S-K1" 
                     3 "Savings Bond" 2 "1099-INT Loan" 6 "1041-K1") col(3)
               region(lc(white))) $gpr xsize(5.5);
    graph export "interest_incomecompoR_year.pdf", replace;
    twoway 
        (connect incomecompo_1099bank year, msize(small) lw(thin) mc("$u1") lc("$u1") ms(o)) 
        (connect incomecompo_1099loan year, msize(small) lw(thin) mc("$p2") lc("$p2") ms(d))
        (connect incomecompo_1099sav year, msize(small) lw(thin) mc("$f1") lc("$f1") ms(oh))
        (connect incomecompo_1065 year, msize(small) lw(thin) mc("$u3") lc("$u3") ms(s)) 
        (connect incomecompo_1120s year, msize(small) lw(thin) mc("$f3") lc("$f3") ms(t)) 
        (connect incomecompo_1041 year, msize(small) lw(thin) mc("$u4") lc("$u4") ms(+) lp("-"))
        ,
        xtitle("") ytitle("Share of Aggregate Taxable Interest (%)")
        yscale(range(0 100)) ylab(0(20)100)
        legend(order(1 "1099-INT Bank" 4 "1065-K1" 5 "1120S-K1" 
                     3 "Savings Bond" 2 "1099-INT Loan" 6 "1041-K1") col(3)
               region(lc(white))) $gpr xsize(5.5);
    graph export "interest_incomecompo_year.pdf", replace;
    twoway (connect interest_info_share year, color("$u1")), 
        xtitle("") ytitle("Info Return Interest / Aggregate Taxable Interest (%)")
        yscale(range(0 100)) ylab(0(20)100)
        xsca(range(2000.5 2016.5))
        xlab(2001(3)2016)
        xsize(5.5) $gpr;
    graph export "interest_info_aggshare.pdf", replace;
    #delimit cr

end/*%>*/

capture program drop pship_int_facts/*%<*/
program define pship_int_facts
	*Appendix Figure 22b
	
    ****************************************************************************
    * Graphs
    ****************************************************************************
    * Boutique interest rates over time.

    load_analysis_data fred_rates
    keep year r_aaa r_baa r_ust10 r_bbb
    keep if inrange(year, 1966, 2019)
    tempfile bondyields
    gen whichsource = "fred"
    save `bondyields'

    load_analysis_data deposit_rates
    tempfile deposits
    collapse (mean) r=r_deposit, by(year)
    gen whichsource = "deposits"
    save `deposits'

    load_analysis_data bank_rates
    tempfile bank
    save `bank'

    /***********************************************************************
        Retrieve different estimates of boutique interest rates 
            along the AGI distribution from EZ work, and saving bonds and loan
            rates
    ***********************************************************************/
    tempfile boutique
    load_analysis_data boutique_rates
    save `boutique'

    /***********************************************************************
        Fill in bank and loan rates from earlier, then make graph
    ***********************************************************************/

    append using `bank'
    append using `deposits'
    append using `bondyields'

    gen capfactor = 100/r
    gen capfactor_aaa = 100/r_aaa
    gen capfactor_baa = 100/r_baa

    sort year whichsource grp

    colorpalette "$f3", intensity(0.05(.05)1)
    local f31 = "`r(p8)'" 
    local f32 = "`r(p12)'"
    local f33 = "`r(p16)'"
    local f34 = "`r(p20)'"

    colorpalette "$u1", intensity(0.05(.05)1)
    local u11 = "`r(p8)'" 
    local u12 = "`r(p11)'"
    local u13 = "`r(p14)'"
    local u14 = "`r(p17)'"
    local u15 = "`r(p20)'"

    #delimit ;
    twoway 
        (connect r year if whichsource == "boutique" & grp == "99.99",
            color("`u15'") msym(o) lw(thin))
        (connect r year if whichsource == "boutique" & grp == "99.9",
            color("`u14'") msym(s) lw(thin))
        (connect r year if whichsource == "boutique" & grp == "99",
            color("`u13'") msym(t) lw(thin))
        (connect r year if whichsource == "boutique" & grp == "90",
            color("`u12'") msym(x) lw(thin))
        (connect r_aaa year if whichsource == "fred", 
            color("$u5") lp("-") ms(s)) 
        (connect r_baa year if whichsource == "fred",
            color("$u5") lp("-") ms(o) lw(medthin)) 
        (connect r year if whichsource == "bank" & grp == "99.9",
            color("`f34'") msym(o) lw(thin)) 
        (connect r year if whichsource == "nonqual",
            color("$f1") msym(s) lp("_") lw(thin)) 
        if year > 2000 & year <= 2016,
        ytitle("Interest Rate (%)")
        ylab(, format(%9.0f))
        xlab(2001(2)2015) 
        xtitle("")
        legend(order(
                     1 "Boutique, Top 0.01 AGI"
                     4 "Boutique, P90-99 AGI"
                     6 "Moody's Baa" 
                     2 "Boutique, P99.9-99.99 AGI"
                     8 "Non-Qual Divs"  
                     5 "Moody's Aaa" 
                     3 "Boutique, P99-99.9 AGI"
                     7 "Bank, Top 0.1 Non-Int Wealth"
                     )
           size(small) col(3) symxsize(*0.6) region(lc(white) margin(tiny)))
        $gpr
        xsize(6.5)
        ;
    graph export "asset_interest_rates_overtime.pdf", replace;

    twoway 
        (connect capfactor year if whichsource == "boutique" & grp == "99.99",
            color("`u15'") msym(o) lw(thin))
        (connect capfactor year if whichsource == "boutique" & grp == "99.9", 
            color("`u14'") msym(s) lw(thin))
        (connect capfactor year if whichsource == "boutique" & grp == "99",
            color("`u13'") msym(t) lw(thin))
        (connect capfactor year if whichsource == "boutique" & grp == "90", 
            color("`u12'") msym(x) lw(thin))
        (connect capfactor_aaa year if whichsource == "fred",
            color("$u5") lp("-") ms(s)) 
        (connect capfactor_baa year if whichsource == "fred",
            color("$u5") lp("-") ms(o) lw(medthin)) 
        (connect capfactor year if whichsource == "bank" & grp == "99.9",
            color("`f34'") msym(o) lw(thin)) 
        (connect capfactor year if whichsource == "nonqual",
            color("$f1") msym(s) lp("_") lw(thin)) 
        if year > 2000 & year <= 2016,
        ytitle("Capitalization Factor")
        ylab(, format(%9.0f))
        xlab(2001(2)2015) 
        ylab(0(20)100)
        xtitle("")
        legend(order(
                     1 "Boutique, Top 0.01 AGI"
                     4 "Boutique, P90-99 AGI"
                     6 "Moody's Baa" 
                     2 "Boutique, P99.9-99.99 AGI"
                     8 "Non-Qual Divs"  
                     5 "Moody's Aaa" 
                     3 "Boutique, P99-99.9 AGI"
                     7 "Bank, Top 0.1 Non-Int Wealth"
                     )
           size(small) col(3) symxsize(*0.6) region(lc(white) margin(tiny)))
        $gpr
        xsize(6.5);
    graph export "capfactor_assets_overtime.pdf", replace;
    #delimit cr

end/*%>*/

*******************************************************************************

/*******************************************************************************
    Tables/*%<*/
*******************************************************************************/
capture program drop cmd_parameters_table/*%<*/
program define cmd_parameters_table
/*******************************************************************************
    (P) Parameters in classical minimum distance exercise
*******************************************************************************/

    /***************************************************************************
        Load Pis for non-interest wealth and assign labels; save as 
            tempfile
    ***************************************************************************/

    import delim $cmdout/top01-niw-ust5-baa-btstrp200/parameters4.txt, clear
    
    rename (v1 v2) (theta se_theta)
    assert _N == 4

    qui gen rowformerge = _n
    #delimit ;
    qui gen paramlab = cond(_n == 1, "$ \pi_1^I$", 
                   cond(_n == 2, "$ \pi_2^I$",
                   cond(_n == 3, "$ \pi_1^C$",
                   cond(_n == 4, "$ \pi_2^C$",  "alpaca"))));
    #delimit cr
    assert paramlab != "alpaca"

    tempfile pis
    save `pis' 

    /***************************************************************************
        Load coefficients on interest rates and associated standard 
            errors; assign labels.
    ***************************************************************************/

    import delim $cmdout/top01-niw-ust5-baa-btstrp200/coef_ses4.txt, clear

    rename (v1 v2) (theta se_theta) 
    assert _N == 4 

    gen rowformerge = _n + 4
    #delimit ; 
    gen paramlab = cond(rowformerge == 5, "Top 0.1\% coefficient on ln $ r_t^I$", 
                   cond(rowformerge == 6, "Top 0.1\% coefficient on ln $ r_t^C$",
                   cond(rowformerge == 7, "Bottom 99.9\% coefficient on ln $ r_t^I$",
                   cond(rowformerge == 8, "Bottom 99.9\% coefficient on ln $ r_t^C$", "alpaca"))));
    #delimit cr
    assert paramlab != "alpaca"

    /***************************************************************************
        Append Pis from step 1. Verify that we can calculate coefficients
            on interest rates from Pis.
    ***************************************************************************/

    append using `pis'
    sort rowformerge

    local pi_i1 = theta[1]
    local pi_i2 = theta[2]
    local pi_c1 = theta[3]
    local pi_c2 = theta[4]

    qui gen theta_check = `pi_c2' / ((`pi_i1' * `pi_c2') - (`pi_i2' * `pi_c1')) in 5
    qui replace theta_check = - `pi_i2' / ((`pi_i1' * `pi_c2') - (`pi_i2' * `pi_c1')) in 6
    qui replace theta_check = - `pi_c1' / ((`pi_i1' * `pi_c2') - (`pi_i2' * `pi_c1')) in 7
    qui replace theta_check = `pi_i1' / ((`pi_i1' * `pi_c2') - (`pi_i2' * `pi_c1')) in 8

    assert abs(theta - theta_check) < 1E-4 if !missing(theta_check)
    drop theta_check

    /***************************************************************************
        Prepare to output table
    ***************************************************************************/

    outsheet theta se_theta paramlab using cmd_niw_parameters.csv, replace

    format theta se_theta %5.2f
    tostring se_theta, replace force usedisplayformat

    replace se_theta = "(" + se_theta + ")"

    qui gen begtab = "\begin{tabular}{lcc}" in 1
    qui gen toprule = "\toprule" in 1
    qui gen hline = "\hline" in 1
    qui gen botrule = "\bottomrule" in 1
    qui gen endtab = "\end{tabular}" in 1
    qui gen c = "" in 1

    qui gen tabtitle = "& \textit{Estimate} & \textit{Std. error}" in 1

    /***************************************************************************
        Write table to .tex
    ***************************************************************************/

    local tabname = "cmd_niw_parameters_table"

    qui listtex begtab if _n == 1 using "`tabname'.tex", replace rstyle(none)
    qui listtex toprule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    *qui listtex hline if _n == 1, appendto("`tabname'.tex") rstyle(none)
    qui listtex tabtitle if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    qui listtex hline if _n == 1, appendto("`tabname'.tex") rstyle(none)

    qui listtex paramlab theta se_theta if inrange(rowformerge, 1, 4), ///
        appendto("`tabname'.tex") rstyle(tabular)

    qui listtex c c c if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    qui listtex hline if _n == 1, appendto("`tabname'.tex") rstyle(none)
    qui listtex c c c if _n == 1, appendto("`tabname'.tex") rstyle(tabular)

    qui listtex paramlab theta se_theta if inrange(rowformerge, 5, 8), ///
        appendto("`tabname'.tex") rstyle(tabular)

    *qui listtex hline if _n == 1, appendto("`tabname'.tex") rstyle(none)
    qui listtex botrule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    qui listtex endtab if _n == 1, appendto("`tabname'.tex") rstyle(none)
end/*%>*/

capture program drop calibrated_moments_table/*%<*/
program define calibrated_moments_table
/*******************************************************************************
    (C) Calibrated moments in the SCF for CMD exercise
*******************************************************************************/

    /***************************************************************************
        Pull in CMD inputs and do calculations
    ***************************************************************************/

    import delimited using $inputs/cmdinputs_top01bot99pt9_20210615.csv, clear
    keep if inrange(year, 1989, 2019) & mod(year - 1989, 3) == 0

    keep ln_r1_scf_niwrank ln_r2_scf_niwrank ln_a1_scf_niwrank ///
        ln_a2_scf_niwrank mean_sa1_scf_niwrank

    rename (ln_r1 ln_r2 ln_a1 ln_a2 mean_sa1) (ln_r1 ln_r2 ln_a1 ln_a2 sa1)

    foreach muvar in r1 r2 a1 a2 {
        qui summ ln_`muvar', meanonly   
        gen mu_`muvar' = `r(mean)' in 1
    }

    corr ln_r1 ln_r2, covariance
    gen var_r1 = `r(Var_1)' in 1
    gen var_r2 = `r(Var_2)' in 1
    gen cov_r1r2 = `r(cov_12)' in 1

    corr ln_a1 ln_a2, covariance
    gen var_a1 = `r(Var_1)' in 1
    gen var_a2 = `r(Var_2)' in 1
    gen cov_a1a2 = `r(cov_12)' in 1

    corr ln_r1 ln_a1, covariance
    assert inrange(`r(Var_1)' / var_r1, 0.9999, 1.0001) in 1
    assert inrange(`r(Var_2)' / var_a1, 0.9999, 1.0001) in 1
    gen cov_r1a1 = `r(cov_12)' in 1

    corr ln_r1 ln_a2, covariance
    assert inrange(`r(Var_1)' / var_r1, 0.9999, 1.0001) in 1
    assert inrange(`r(Var_2)' / var_a2, 0.9999, 1.0001) in 1
    gen cov_r1a2 = `r(cov_12)' in 1

    corr ln_r2 ln_a1, covariance
    assert inrange(`r(Var_1)' / var_r2, 0.9999, 1.0001) in 1
    assert inrange(`r(Var_2)' / var_a1, 0.9999, 1.0001) in 1
    gen cov_r2a1 = `r(cov_12)' in 1

    corr ln_r2 ln_a2, covariance
    assert inrange(`r(Var_1)' / var_r2, 0.9999, 1.0001) in 1
    assert inrange(`r(Var_2)' / var_a2, 0.9999, 1.0001) in 1
    gen cov_r2a2 = `r(cov_12)' in 1

    keep mu_r? mu_a? var_* cov_* sa1
    keep in 1

    xpose, clear varname
    rename (v1 _varname) (moment whichmoment)
    order whichmoment

    tempfile computedfromscf
    save `computedfromscf'

    /***************************************************************************
        Cross-reference with moments calculated in the CMD Matlab code
    ***************************************************************************/

    import delim $cmdout/top01-niw-ust5-baa-btstrp200/scfcalibrated4.txt, clear
    assert _N == 15

    rename v1 moment_matlab

    gen rownum = _n
    #delimit ;
    gen whichmoment = cond(_n == 1, "mu_r1", 
                      cond(_n == 2, "mu_r2", 
                      cond(_n == 3, "mu_a1", 
                      cond(_n == 4, "mu_a2", 
                      cond(_n == 5, "var_r1", 
                      cond(_n == 6, "var_r2", 
                      cond(_n == 7, "var_a1", 
                      cond(_n == 8, "var_a2",
                      cond(_n == 9, "cov_r1r2", 
                      cond(_n == 10, "cov_r1a1", 
                      cond(_n == 11, "cov_r1a2", 
                      cond(_n == 12, "cov_r2a1", 
                      cond(_n == 13, "cov_r2a2", 
                      cond(_n == 14, "cov_a1a2", "sa1"))))))))))))));
    #delimit cr

    qui ds whichmoment, not
    order `r(varlist)', alphabetic last

    merge 1:1 whichmoment using `computedfromscf', assert(3) nogen

    assert inrange(moment / moment_matlab, 0.9999, 1.0001)
    drop moment_matlab

    /***************************************************************************
        Prepare to make table
    ***************************************************************************/

    #delimit ;
    gen symbol = cond(whichmoment == "mu_r1", "\mu_{r_1}", 
                 cond(whichmoment == "mu_r2", "\mu_{r_2}", 
                 cond(whichmoment == "mu_a1", "\mu_{a_1}", 
                 cond(whichmoment == "mu_a2", "\mu_{a_2}", 
                 cond(whichmoment == "var_r1", "\sigma^2_{r_1}", 
                 cond(whichmoment == "var_r2", "\sigma^2_{r_2}", 
                 cond(whichmoment == "var_a1", "\sigma^2_{a_1}", 
                 cond(whichmoment == "var_a2", "\sigma^2_{a_2}",
                 cond(whichmoment == "cov_r1r2", "c_{r_1,r_2}", 
                 cond(whichmoment == "cov_r1a1", "c_{r_1,a_1}", 
                 cond(whichmoment == "cov_r1a2", "c_{r_1,a_2}", 
                 cond(whichmoment == "cov_r2a1", "c_{r_2,a_1}", 
                 cond(whichmoment == "cov_r2a2", "c_{r_2,a_2}", 
                 cond(whichmoment == "cov_a1a2", "c_{a_1,a_2}", "s^a_1"))))))))))))));
    #delimit cr
    replace symbol = "$" + symbol + "$"

    sort rownum

    format moment %5.3f

    qui gen begtab = "\begin{tabular}{lr}" in 1
    qui gen toprule = "\toprule" in 1
    qui gen hline = "\hline" in 1
    qui gen botrule = "\bottomrule" in 1
    qui gen endtab = "\end{tabular}" in 1

    qui gen tabtitle = "Moment & Value" in 1

    /***************************************************************************
        Output .tex for table
    ***************************************************************************/

    local tabname = "calibrated_niw_moments"

    qui listtex begtab if _n == 1 using "`tabname'.tex", replace rstyle(none)
    qui listtex toprule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    qui listtex tabtitle if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    qui listtex hline if _n == 1, appendto("`tabname'.tex") rstyle(none)

    qui listtex symbol moment, appendto("`tabname'.tex") rstyle(tabular)

    qui listtex botrule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    qui listtex endtab if _n == 1, appendto("`tabname'.tex") rstyle(none)

end/*%>*/

capture program drop scfbizsummstats /*%<*/
program define scfbizsummstats
/*******************************************************************************
    Appendix Table 3
	(S) Summary statistics for businesses in the Survey of Consumer Finances  
*******************************************************************************/

    /***************************************************************************
        Collapse to yield statistics that we want for tables
    ***************************************************************************/

        /***********************************************************************
            Make lists of variables we want to get collapses of; store 
                these in local macros for easier collapses
        ***********************************************************************/

    use $dtadir/scf2016bizdetail.dta, clear

    * Active businesses 1 and 2
    ds actbus1emp actbus1sharemktval actbus1totalmktval actbus1sharecostbasis ///
        actbus1totalcostbasis actbus1totalsales actbus1sharesales actbus1totalnetincm ///
        actbus1sharenetincm actbus1mktval_*_cnsr
    local active1vars "`r(varlist)'"

    local active2vars = subinstr("`active1vars'", "1", "2", .)

    * Active businesses 3+
    ds actbus3plus_sharemktval actbus3plus_sharecostbasis actbus3plus_sharenetincm ///
        actbus3p_mktval_*_cnsr
    local active3vars "`r(varlist)'"

    * Non-actively managed businesses on aggregate
    ds nonactsharesmktval nonactsharescostbasis nonactsharesnetincm nonactmktval_*_cnsr
    local nonactvars "`r(varlist)'"

    forv num_biz = 1 / 3 {
        foreach moment in "mean" "sd" "p50" "min" "max" "p5" "p95" {
            
            local active`num_biz'_`moment' = "`active`num_biz'vars'"

            foreach activevar of varlist `active`num_biz'vars' {
                local renamestub = subinstr("`activevar'", "_cnsr", "", 1)

                local active`num_biz'_`moment' = subinstr("`active`num_biz'_`moment''", "`activevar'", ///
                                                          "`moment'_`renamestub' = `activevar'", 1)
            }
        }
    }

    foreach moment in "mean" "sd" "p50" "min" "max" "p5" "p95" { 
        
        local nonactbiz_`moment' = "`nonactvars'"

        foreach nonactvar of varlist `nonactvars' {
            local renamestub = subinstr("`nonactvar'", "_cnsr", "", 1)

            local nonactbiz_`moment' = subinstr("`nonactbiz_`moment''", "`nonactvar'", ///
                                                "`moment'_`renamestub' = `nonactvar'", 1)
        }
    }

        /***********************************************************************
            Cycling through ranking groups, collapse to yield desired 
                stats
        ***********************************************************************/

    foreach topgrp in top1wlth top1_01wlth top01wlth {

            /*******************************************************************
                Collapse to yield statistics on number of businesses 
                    owned 
            *******************************************************************/

        use $dtadir/scf2016bizdetail.dta, clear

        #delimit ;
        collapse (mean) sharebus = anyprivbiz share1plusactbus = oneplusactbus
                    share2plusactbus = twoplusactbus share3plusactbus = threeplusactbus
                    sharenonactbus = anynonactbus mean_numbus = numbus 
                    mean_numactbus = numactbus mean_numnonactbus = numnonactbus 
                 (sd) sd_numbus = numbus sd_numactbus = numactbus 
                    sd_numnonactbus = numnonactbus 
                 (median) p50_numbus = numbus p50_numactbus = numactbus 
                    p50_numnonactbus = numnonactbus 
                 (p5) p5_numbus = numbus p5_numactbus = numactbus 
                    p5_numnonactbus = numnonactbus 
                 (p95) p95_numbus = numbus p95_numactbus = numactbus 
                    p95_numnonactbus = numnonactbus 
                 (min) min_numbus = numbus min_numactbus = numactbus 
                    min_numnonactbus = numnonactbus 
                 (max) max_numbus = numbus max_numactbus = numactbus 
                    max_numnonactbus = numnonactbus 
            [aw = wgt], by(`topgrp');
        #delimit cr

        reshape long mean share sd p5 p50 p95 min max, i(`topgrp') j(quantity, string)

        rename (mean share sd p5 p50 p95 min max) ///
            (`topgrp'_mean `topgrp'_share `topgrp'_sd `topgrp'_p5 `topgrp'_p50 `topgrp'_p95 ///
                `topgrp'_min `topgrp'_max)

        replace quantity = subinstr(quantity, "_", "", 1)

        keep if `topgrp' == 1
        drop `topgrp'

        tempfile `topgrp'_basic
        save ``topgrp'_basic'

            /*******************************************************************
                Collapse to yield statistics on actively-managed 
                    businesses, conditional on actively-managed business 
                    ownership
            *******************************************************************/

        forv num_biz = 1 / 3 {
            
            use $dtadir/scf2016bizdetail.dta, clear

            keep if numactbus >= `num_biz'

            collapse (mean) `active`num_biz'_mean' (sd) `active`num_biz'_sd' ///
                (median) `active`num_biz'_p50' (p5) `active`num_biz'_p5' (p95) `active`num_biz'_p95' ///
                (min) `active`num_biz'_min' (max) `active`num_biz'_max' [aw = wgt], by(`topgrp')

            reshape long mean sd p5 p50 p95 min max, i(`topgrp') j(quantity, string)

            rename (mean sd p5 p50 p95 min max) ///
            (`topgrp'_mean `topgrp'_sd `topgrp'_p5 `topgrp'_p50 `topgrp'_p95 ///
                `topgrp'_min `topgrp'_max)

            replace quantity = subinstr(quantity, "_", "", 1)

            keep if `topgrp' == 1
            drop `topgrp'

            tempfile `topgrp'_active`num_biz'
            save ``topgrp'_active`num_biz''
        }

            /*******************************************************************
                Collapse to yield statistics on non-actively managed 
                    businesses, conditional on owning one
            *******************************************************************/

        use $dtadir/scf2016bizdetail.dta, clear

        keep if anynonactbus == 1

        collapse (mean) `nonactbiz_mean' (sd) `nonactbiz_sd' (median) `nonactbiz_p50' ///
            (p5) `nonactbiz_p5' (p95) `nonactbiz_p95' (min) `nonactbiz_min' ///
            (max) `nonactbiz_max' [aw = wgt], by(`topgrp')

        reshape long mean sd p5 p50 p95 min max, i(`topgrp') j(quantity, string)

        rename (mean sd p5 p50 p95 min max) ///
            (`topgrp'_mean `topgrp'_sd `topgrp'_p5 `topgrp'_p50 `topgrp'_p95 ///
                `topgrp'_min `topgrp'_max)

        replace quantity = subinstr(quantity, "_", "", 1)

        keep if `topgrp' == 1
        drop `topgrp'

        tempfile `topgrp'_nonactbiz
        save ``topgrp'_nonactbiz'
    }

    /***************************************************************************
         Merge together the various tempfiles containing desired statistics
    ***************************************************************************/

    use `top1wlth_basic', clear
    append using `top1wlth_active1' `top1wlth_active2' `top1wlth_active3' `top1wlth_nonactbiz'

    foreach topgrp in top1_01wlth top01wlth {
        
        merge 1:1 quantity using ``topgrp'_basic', assert(1 3)
        
        assert _merge == 3 if regexm(quantity, "[1-3]plusactbus") | /// 
            inlist(quantity, "bus", "actbus", "nonactbus", "numbus", "numactbus", "numnonactbus")
        
        assert _merge == 1 if !regexm(quantity, "[1-3]plusactbus") & ///
            !inlist(quantity, "bus", "actbus", "nonactbus", "numbus", "numactbus", "numnonactbus")
        
        drop _merge

        forv num_biz = 1 / 3 {
            merge 1:1 quantity using ``topgrp'_active`num_biz'', update assert(1 4)
            assert _merge == 4 if regexm(quantity, "actbus`num_biz'")
            assert _merge == 1 if !regexm(quantity, "actbus`num_biz'")
            drop _merge
        }

        merge 1:1 quantity using ``topgrp'_nonactbiz', update assert(1 4)
        assert _merge == 4 if regexm(quantity, "nonactshares") | regexm(quantity, "nonactmktval_")
        assert _merge == 1 if !regexm(quantity, "nonactshares") & !regexm(quantity, "nonactmktval_")
        drop _merge
    }

    /***************************************************************************
        Reformat statistics so they're suitable for tables
    ***************************************************************************/

    foreach sharevar of varlist *_share { // Scale up shares into percentages
        replace `sharevar' = `sharevar' * 100 
    }

    /* Scale down net income, sales, market values, and cost basis into thousands, 
        but be careful not to scale down multiples */
    qui ds quantity *_share, not
    foreach column of varlist `r(varlist)' { 

        #delimit ;
        replace `column' = `column' / 1E3 if (regexm(quantity, "netincm") | 
                                              regexm(quantity, "sales") | 
                                              regexm(quantity, "mktval") | 
                                              regexm(quantity, "costbasis")) & 
                                              !regexm(quantity, "mktval_");
        #delimit cr
    }

    /***************************************************************************
        Prepare to make tables
    ***************************************************************************/

    gen begtab = "\begin{tabular}{@{\extracolsep{4pt}}lrrrrrrrrrr@{}}" in 1
    gen endtab  = "\end{tabular}" in 1

    gen toprule = "\toprule" in 1
    gen botrule = "\bottomrule" in 1

    gen hline = "\hline" in 1
    gen cline = "\cline{2-6} \cline{7-11}" in 1

    gen c = "" in 1

    gen hspace2 = "\hspace{2mm}"
    gen hspace4 = "\hspace{4mm}"
    gen vspace2 = "\vspace{2mm}" in 1
    gen vspace4 = "\vspace{4mm}" in 1

    gen supertitle_wlth = "& \multicolumn{5}{c}{P99-99.9 wealth} & \multicolumn{5}{c}{Top 0.1\% wealth}" in 1
    gen supertitle_agi = "& \multicolumn{5}{c}{P99-99.9 AGI} & \multicolumn{5}{c}{Top 0.1\% AGI}" in 1
    gen tabtitle = "& Mean & Std. dev. & P50 & Min. & Max & Mean & Std. dev. & P50 & Min. & Max" in 1

    gen actbus1 = "\textit{Active Business \#1}" in 1
    gen actbus2 = "\textit{Active Business \#2}" in 1
    gen actbus3plus = "\textit{Active Businesses \#3 and Beyond}" in 1
    gen nonactbus = "\textit{Non-actively managed businesses}" in 1

    gen sales_miniheader = hspace2 + "Gross sales" in 1
    gen netincm_miniheader = hspace2 + "Net income (profits)" in 1
    gen mktval_miniheader = hspace2 + "Market value" in 1
    gen costbasis_miniheader = hspace2 + "Cost basis" in 1

    #delimit ;
    gen rowlab = cond(quantity == "numbus", "Number of businesses owned",
                 cond(quantity == "numactbus", "Actively managed",
                 cond(quantity == "numnonactbus", "Non-actively managed", 
                 cond(quantity == "bus", "Share (\%) own any business", 
                 cond(quantity == "1plusactbus", "Share (\%) own 1+ actively-mgd bus.", 
                 cond(quantity == "2plusactbus", "Share (\%) own 2+ actively-mgd bus.", 
                 cond(quantity == "3plusactbus", "Share (\%) own 3+ actively-mgd bus.",
                 cond(quantity == "nonactbus", "Share (\%) own non-actively mgd bus.",
                 cond(regexm(quantity, "total") & !regexm(quantity, "actbus3"), "Total", 
                 cond(regexm(quantity, "share") & !regexm(quantity, "actbus3") & !regexm(quantity, "nonact"), "Respondents' share", 
                 cond(regexm(quantity, "emp"), "Total employment", 
                 cond(inlist(quantity, "actbus3plus_sharenetincm", "nonactsharesnetincm"), "Net income rcvd by respondents", 
                 cond(inlist(quantity, "actbus3plus_sharemktval", "nonactsharesmktval"), "Market value respondents' share", 
                 cond(inlist(quantity, "actbus3plus_sharecostbasis", "nonactsharescostbasis"), "Cost basis respondents' share", 
                 cond(regexm(quantity, "mktval_costbasis"), "\$ \frac{\text{Market value}}{\text{Cost basis}}\$",
                 cond(regexm(quantity, "mktval_netincm"), "\$ \frac{\text{Market value}}{\text{Profits}}\$",
                 cond(regexm(quantity, "mktval_sales"), "\$ \frac{\text{Market value}}{\text{Sales}}\$", "alpaca")))))))))))))))));

    assert rowlab != "alpaca";

    replace rowlab = cond(inlist(rowlab, "Total", "Respondents' share"), hspace4 + rowlab, 
                     cond(!inlist(quantity, "bus", "numbus"), hspace2 + rowlab, rowlab));
    #delimit cr

    order rowlab

    outsheet using scfbiztable.csv, replace

    /***************************************************************************
        Output tables to .tex
    ***************************************************************************/

        /***********************************************************************
            Basic summary statistics table
        ***********************************************************************/

    format *_share *_mean *_sd *_p50 *_min *_max %12.0fc

    local tabname = "scf2016bizdetail_summstats"

    listtex begtab if _n == 1 using "`tabname'.tex", replace rstyle(none)
    listtex toprule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    *listtex hline if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex supertitle_wlth if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    listtex cline if _n == 1, appendto("`tabname'.tex") rstyle(none)
    listtex tabtitle if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    listtex hline if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex rowlab top1_01wlth_share c c c c top01wlth_share c c c c ///
        if quantity == "bus", appendto("`tabname'.tex") rstyle(tabular)

    foreach quantity in numbus numactbus numnonactbus {

        if "`quantity'" == "numnonactbus" {
            listtex vspace4 if _n == 1, appendto("`tabname'.tex") rstyle(none)
        }

        #delimit ;
        listtex rowlab top1_01wlth_mean top1_01wlth_sd top1_01wlth_p50 
            top1_01wlth_min top1_01wlth_max top01wlth_mean top01wlth_sd 
            top01wlth_p50 top01wlth_min top01wlth_max
            if quantity == "`quantity'", appendto("`tabname'.tex") rstyle(tabular);
        #delimit cr
    }

    forv biznum = 1 / 2 {

        listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

        listtex actbus`biznum' c c c c c c c c c c if _n == 1, appendto("`tabname'.tex") rstyle(tabular)

        listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

        listtex rowlab top1_01wlth_share c c c c top01wlth_share c c c c ///
            if quantity == "`biznum'plusactbus", appendto("`tabname'.tex") rstyle(tabular)

        foreach category in sales netincm mktval costbasis {

            listtex `category'_miniheader if _n == 1, appendto("`tabname'.tex") rstyle(tabular)

            foreach quantity in "total`category'" "share`category'" {

                #delimit ;
                listtex rowlab top1_01wlth_mean top1_01wlth_sd top1_01wlth_p50 
                    top1_01wlth_min top1_01wlth_max top01wlth_mean top01wlth_sd 
                    top01wlth_p50 top01wlth_min top01wlth_max 
                        if quantity == "actbus`biznum'`quantity'", appendto("`tabname'.tex") rstyle(tabular);
                #delimit cr
            }
        }

        listtex vspace4 if _n == 1, appendto("`tabname'.tex") rstyle(none)  

        #delimit ;
        listtex rowlab top1_01wlth_mean top1_01wlth_sd top1_01wlth_p50 
            top1_01wlth_min top1_01wlth_max top01wlth_mean top01wlth_sd 
            top01wlth_p50 top01wlth_min top01wlth_max 
                if quantity == "actbus`biznum'emp", appendto("`tabname'.tex") rstyle(tabular);
        #delimit cr
    }

    listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex actbus3 c c c c c c c c c c if _n == 1, appendto("`tabname'.tex") rstyle(tabular)

    listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex rowlab top1wlth_share c c c c top01wlth_share c c c c if quantity == "3plusactbus", ///
        appendto("`tabname'.tex") rstyle(tabular)

    foreach quantity in netincm mktval costbasis {

        if "`quantity'" == "costbasis" {
            listtex vspace4 if _n == 1, appendto("`tabname'.tex") rstyle(none)  
        }

        #delimit ;
        listtex rowlab top1_01wlth_mean top1_01wlth_sd top1_01wlth_p50 
            top1_01wlth_min top1_01wlth_max top01wlth_mean top01wlth_sd 
            top01wlth_p50 top01wlth_min top01wlth_max 
                if quantity == "actbus3plus_share`quantity'", 
            appendto("`tabname'.tex") rstyle(tabular);
        #delimit cr
    }

    listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex nonactbus c c c c c c c c c c if _n == 1, appendto("`tabname'.tex") rstyle(tabular)

    listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex rowlab top1wlth_share c c c c top01wlth_share c c c c if quantity == "nonactbus", ///
        appendto("`tabname'.tex") rstyle(tabular)

    foreach quantity in netincm mktval costbasis {

        #delimit ;
        listtex rowlab top1_01wlth_mean top1_01wlth_sd top1_01wlth_p50 
            top1_01wlth_min top1_01wlth_max top01wlth_mean top01wlth_sd 
            top01wlth_p50 top01wlth_min top01wlth_max 
            if quantity == "nonactshares`quantity'", 
        appendto("`tabname'.tex") rstyle(tabular);
        #delimit cr
    }

    listtex botrule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    listtex endtab if _n == 1, appendto("`tabname'.tex") rstyle(none)

end/*%>*/

capture program drop scfbizmultiples/*%<*/
program define scfbizmultiples
/*******************************************************************************
    Appendix Table 3
	(M) Valuation multiples for SCF businesses in 2016 (data processed in 
        program for previous table)
*******************************************************************************/

    import delimited using scfbiztable.csv, clear

    replace tabtitle = "& Mean & Std. dev. & P5 & P50 & P95 & Mean & Std. dev. & P5 & P50 & P95" in 1

    format *_share *_mean *_sd *_p5 *_p50 *_p95 %12.1fc

    local tabname = "scf2016bizdetail_multiples"

    listtex begtab if _n == 1 using "`tabname'.tex", replace rstyle(none)
    listtex toprule if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex supertitle_wlth if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    listtex cline if _n == 1, appendto("`tabname'.tex") rstyle(none)
    listtex tabtitle if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    listtex hline if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

    forv biznum = 1 / 2 {

        listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

        listtex actbus`biznum' c c c c c c c c c c if _n == 1, appendto("`tabname'.tex") rstyle(tabular)

        foreach quantity in sales netincm costbasis {

            if "`quantity'" == "costbasis" {
                listtex vspace4 if _n == 1, appendto("`tabname'.tex") rstyle(none)  
            }

            #delimit ;
            listtex rowlab top1_01wlth_mean top1_01wlth_sd top1_01wlth_p5
                top1_01wlth_p50 top1_01wlth_p95 top01wlth_mean top01wlth_sd 
                top01wlth_p5 top01wlth_p50 top01wlth_p95 
                    if quantity == "actbus`biznum'mktval_`quantity'", 
                appendto("`tabname'.tex") rstyle(tabular);
            #delimit cr
        }
    }

    listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex actbus3 c c c c c c c c c c if _n == 1, appendto("`tabname'.tex") rstyle(tabular)

    foreach quantity in netincm costbasis {

        if "`quantity'" == "costbasis" {
            listtex vspace4 if _n == 1, appendto("`tabname'.tex") rstyle(none)  
        }

        #delimit ;
        listtex rowlab top1_01wlth_mean top1_01wlth_sd top1_01wlth_p5 
            top01wlth_p50 top1_01wlth_p95  top1_01wlth_mean 
            top01wlth_sd top01wlth_p5 top01wlth_p50 top01wlth_p95  
                if quantity == "actbus3p_mktval_`quantity'", 
            appendto("`tabname'.tex") rstyle(tabular);
        #delimit cr
    }

    listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex nonactbus c c c c c c c c c c if _n == 1, appendto("`tabname'.tex") rstyle(tabular)

    foreach quantity in netincm costbasis {

        if "`quantity'" == "costbasis" {
            listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)  
        }

        #delimit ;
        listtex rowlab top1_01wlth_mean top1_01wlth_sd top1_01wlth_p5 
            top1_01wlth_p50 top1_01wlth_p95 top01wlth_mean 
            top01wlth_sd top01wlth_p5 top01wlth_p50 top01wlth_p95
            if quantity == "nonactmktval_`quantity'", appendto("`tabname'.tex") rstyle(tabular);
        #delimit cr
    }

    listtex botrule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    listtex endtab if _n == 1, appendto("`tabname'.tex") rstyle(none)
end/*%>*/

capture program drop compustatmultiples/*%<*/
program define compustatmultiples

    ****************************************************************************
    *Appendix Table 4
	* Compute some Compustat multiples to compare to SCF and SDC
        * Market to revenues
        * Market to "profits" 
        * Market to book value of equity
    ****************************************************************************
    load_analysis_data cstat_multiples
    tabstat eqmultiple_sale eqmultiple_bookeq eqmultiple_capital eqmultiple_pretax ///
        if inrange(fyear, 2016, 2016), s(mean sd p5 p25 p50 p75 p95) save

    matrix m = r(StatTotal)

    * make simple table
    clear
    set obs 4
    gen rowname = ""
    replace rowname = "\$\frac{\text{Market value}}{\text{Sales}}\$" in 1
    replace rowname = "\$\frac{\text{Market value}}{\text{Book Equity}}\$" in 2
    replace rowname = "\$\frac{\text{Market value}}{\text{Net Capital}}\$" in 3
    replace rowname = "\$\frac{\text{Market value}}{\text{Pretax Income}}\$" in 4

    gen mean = .
    gen sd = .
    gen p5 = .
    gen p25 = .
    gen p50 = .
    gen p75 = .
    gen p95 = .
    forv i = 1/4 {
        replace mean = m[1,`i'] in `i'
        replace sd = m[2,`i'] in `i'
        replace p5 = m[3,`i'] in `i'
        replace p25 = m[4,`i'] in `i'
        replace p50 = m[5,`i'] in `i'
        replace p75 = m[6,`i'] in `i'
        replace p95 = m[7,`i'] in `i'
    }

    gen begtab = "\begin{tabular}{lrrrrrrr}" in 1
    gen endtab  = "\end{tabular}" in 1

    gen toprule = "\toprule" in 1
    gen botrule = "\bottomrule" in 1

    gen hline = "\midrule" in 1
    gen cline = "\cline{2-8}" in 1

    gen c = "" in 1

    gen hspace2 = "\hspace{2mm}"
    gen hspace4 = "\hspace{4mm}"
    gen vspace2 = "\vspace{2mm}" in 1
    gen vspace4 = "\vspace{4mm}" in 1

    gen tabtitle = "& Mean & Std. dev. & P5 & P25 & P50 & P75 & P95" in 1

    local tabname = "compustat_multiples"

    listtex begtab if _n == 1 using "`tabname'.tex", replace rstyle(none)
    listtex toprule if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)
    listtex tabtitle if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    listtex hline if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

    format mean sd p5 p25 p50 p75 p95 %12.1fc

    forv i = 1/4 {

        listtex vspace2 if _n == 1 & `i' != 4, appendto("`tabname'.tex") rstyle(none)

        #delimit ;
        listtex rowname mean sd p5 p25 p50 p75 p95 in `i' , 
            appendto("`tabname'.tex") rstyle(tabular);
        #delimit cr
    }

    listtex botrule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    listtex endtab if _n == 1, appendto("`tabname'.tex") rstyle(none)

end/*%>*/

capture program drop wordcloud/*%<*/
program define wordcloud
	*Appendix Table 6
    insheet using "$maincollapses/post_collapse_data/1065_fundrates_wordcloud_2016_anonymized.csv", comma clear

    gsort - count

    gen begtab = "\begin{tabular}{lccccc}" in 1
    gen endtab  = "\end{tabular}" in 1

    gen toprule = "\toprule" in 1
    gen botrule = "\bottomrule" in 1

    gen midrule = "\midrule" in 1

    gen c = "" in 1

    gen hspace2 = "\hspace{2mm}"
    gen hspace4 = "\hspace{4mm}"
    gen vspace2 = "\vspace{2mm}" in 1
    gen vspace4 = "\vspace{4mm}" in 1

    gen tabtitle = "Fund Name Token & Number of Funds & \multicolumn{2}{c}{Rate, Unweighted} & \multicolumn{2}{c}{Rate, Weighted}" in 1
    gen cmidrule = "\cmidrule(lr){3-4} \cmidrule(lr){5-6}" in 1
    gen tabsubtitle = "& & Mean & Std. Dev. & Mean & Std. Dev." in 1

    local tabname = "wordcloud"

    listtex begtab if _n == 1 using "`tabname'.tex", replace rstyle(none)
    listtex toprule if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex tabtitle if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    listtex cmidrule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    listtex tabsubtitle if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    
    listtex midrule if _n == 1, appendto("`tabname'.tex") rstyle(none)

    foreach v of varlist rate* {
        replace `v' = 100 * `v'
    }
    format rate* %12.2fc

    count

    forv i = 2/`r(N)' {
        #delimit ;
        listtex word count rate ratesd ratew ratewsd in `i', 
            appendto("`tabname'.tex") rstyle(tabular);
        #delimit cr
        if inlist(`i'-1,20,40,60,80) {
            listtex midrule in 1, appendto("`tabname'.tex") rstyle(none)
        }
    }

    listtex midrule in 1, appendto("`tabname'.tex") rstyle(none)
    
    listtex word count rate ratesd ratew ratewsd in 1, appendto("`tabname'.tex") rstyle(tabular)

    listtex botrule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    listtex endtab if _n == 1, appendto("`tabname'.tex") rstyle(none)

end/*%>*/

capture program drop scfbizmultiples_top1detail/*%<*/
program define scfbizmultiples_top1detail
/*******************************************************************************
    Appendix Table 5
	(T1M) Top 1% business multiples, segmented by sales bin
*******************************************************************************/

    /***************************************************************************
        Collapse to yield desired statistics
    ***************************************************************************/ 

    use $dtadir/scf2016bizdetail.dta, clear

    summ actbus1totalsales, detail
    local maxsales = `r(max)'

    #delimit ;
    egen revenue_group = cut(actbus1totalsales) if top1wlth == 1 & numactbus >= 1, 
        at(0 1E6 1E7 5E7 1E8 `maxsales') icodes;
    #delimit cr

    assert !missing(revenue_group) if top1wlth == 1 & numactbus >= 1
    drop if top1wlth == 0 | numactbus == 0

    ds actbus1mktval_*_cnsr
    local multiples = "`r(varlist)'"

    foreach moment in "mean" "sd" "p50" "p5" "p95" { 

        local multiples_`moment' = "`multiples'"

        foreach multiple of varlist `multiples' {

            local renamestub = subinstr("`multiple'", "_cnsr", "", 1)

            local multiples_`moment' = subinstr("`multiples_`moment''", "`multiple'", ///
                                                "`moment'_`renamestub' = `multiple'", 1)
        }
    }

    gen mktval_wgtd = actbus1sharemktval * wgt

    #delimit ;
    collapse (mean) `multiples_mean' (sd) `multiples_sd' (median) `multiples_p50'
        (p5) `multiples_p5' (p95) `multiples_p95' (rawsum) mktval = mktval_wgtd 
        [aw = wgt], by(revenue_group);
    #delimit cr

    reshape long mean sd p5 p50 p95, i(revenue_group) j(multiple, string)

    replace multiple = subinstr(multiple, "_actbus1mktval_", "", 1)

    #delimit ;
    gen revenue_group_str = cond(revenue_group == 0, "_under1M", 
                            cond(revenue_group == 1, "_1Mto10M", 
                            cond(revenue_group == 2, "_10Mto50M", 
                            cond(revenue_group == 3, "_50Mto100M", 
                            cond(revenue_group == 4, "_100Mplus", "alpaca")))));
    #delimit cr
    assert revenue_group_str != "alpaca"
    drop revenue_group

    reshape wide mean sd p5 p50 p95 mktval, i(multiple) j(revenue_group_str, string)

    egen mktval_total = rowtotal(mktval_*)
    assert !missing(mktval_total)

    foreach salesgroup in 100Mplus 10Mto50M 1Mto10M 50Mto100M under1M {
        gen share_mktval_`salesgroup' = (mktval_`salesgroup' / mktval_total) * 100
        assert !missing(share_mktval_`salesgroup')
    }
    drop mktval_*

    /***************************************************************************
        Prepare to make table
    ***************************************************************************/ 

    gen begtab = "\begin{tabular}{lrrrrr}" in 1
    gen endtab  = "\end{tabular}" in 1

    gen toprule = "\toprule" in 1
    gen botrule = "\bottomrule" in 1

    gen hline = "\hline" in 1

    gen c = "" in 1

    gen hspace2 = "\hspace{2mm}"
    gen hspace4 = "\hspace{4mm}"
    gen vspace2 = "\vspace{2mm}" in 1
    gen vspace4 = "\vspace{4mm}" in 1

    gen tabtitle = "& Mean & Std. dev. & P5 & P50 & P95" in 1

    gen shares = hspace4 + "Share top 1\%-owned lgst actively-mngd bus. value" in 1

    gen salesunder1M = "\textit{Total sales under 1M}" in 1
    gen sales1Mto10M = "\textit{Total sales from 1M to 10M}" in 1
    gen sales10Mto50M = "\textit{Total sales from 10M to 50M}" in 1
    gen sales50Mto100M = "\textit{Total sales from 50M to 100M}" in 1
    gen sales100Mplus = "\textit{Total sales greater than 100M}" in 1

    #delimit ;
    gen rowlab = cond(multiple == "costbasis", "\$ \frac{\text{Market value}}{\text{Cost basis}}\$",
                 cond(multiple == "netincm", "\$ \frac{\text{Market value}}{\text{Profits}}\$",
                 cond(multiple == "sales", "\$ \frac{\text{Market value}}{\text{Sales}}\$", "alpaca")));
    #delimit cr
    assert rowlab != "alpaca"

    replace rowlab = hspace4 + rowlab

    order rowlab

    format share_* mean_* sd_* p5_* p50_* p95_* %12.2fc

    /***************************************************************************
         Output table to .tex
    ***************************************************************************/ 

    local tabname = "scf2016bizmultiples_top1wlthdetail"

    listtex begtab if _n == 1 using "`tabname'.tex", replace rstyle(none)
    listtex toprule if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex tabtitle if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    listtex hline if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

    foreach salesgroup in under1M 1Mto10M 10Mto50M 50Mto100M 100Mplus {

        listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

        listtex sales`salesgroup' c c c c c if _n == 1, appendto("`tabname'.tex") rstyle(tabular)

        listtex vspace2 if _n == 1, appendto("`tabname'.tex") rstyle(none)

        listtex shares share_mktval_`salesgroup' c c c c in 1, appendto("`tabname'.tex") rstyle(tabular)

        foreach multiple in sales netincm costbasis {

            if "`multiple'" == "costbasis" {
                listtex vspace4 if _n == 1, appendto("`tabname'.tex") rstyle(none)  
            }

            #delimit ;
            listtex rowlab mean_`salesgroup' sd_`salesgroup' p5_`salesgroup'
                p50_`salesgroup' p95_`salesgroup' if multiple == "`multiple'", 
                appendto("`tabname'.tex") rstyle(tabular);
            #delimit cr
        }
    }

    listtex botrule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    listtex endtab if _n == 1, appendto("`tabname'.tex") rstyle(none)

end/*%>*/

capture program drop houwealth/*%<*/
program define houwealth
/*******************************************************************************
    Appendix Table 11
	(H) Total Housing Wealth under Alternative Property Tax Capitalization
*******************************************************************************/

    /**************************************************************************
        Load in 2016 specific tax data collapses with geo information; 
            this collapses is very stale relative to the old one.
    **************************************************************************/
    
    insheet using $maincollapses/post_collapse_data/state_collapse_hweal112_es_2016.csv, comma clear

    keep if group == "All"

    gen hou_assets = ownerhome_szz + rentalhome_ini 
    gen hou_wealth = hou_assets + ownermort_ini + rentalmort_ini

    foreach houvar of varlist hou_* {
        gen `houvar'_percap = (`houvar' / n) / 1E3 // Make per capita variable
        replace `houvar' = `houvar' / 1E9 // Scale down into billions
    }

    keep state hou_assets* hou_wealth*

    drop if state == "missing" | state == "territory"

    order state
    sort state

    /**************************************************************************
        Copy over half of rows into a second column
    **************************************************************************/

    assert _N == 51
    foreach copyvar of varlist * {
        forv copyrow = 26/51 {
            local inrow = `copyrow' - 25

            capture confirm variable `copyvar'col2 
            if _rc != 0 {
                qui gen `copyvar'col2 = `copyvar'[`copyrow'] in `inrow'
            }
            qui replace `copyvar'col2 = `copyvar'[`copyrow'] in `inrow'
        }
    }

    rename (stateabbrev hou_assets hou_wealth hou_assets_percap hou_wealth_percap) ///
        (stateabbrevcol1 hou_assetscol1 hou_wealthcol1 hou_assets_percapcol1 ///
            hou_wealth_percapcol1)
    qui drop if _n > 27

    /**************************************************************************
        Clean up a bit and make table
    **************************************************************************/

    format hou_* %9.2f
    qui tostring hou_*, replace force usedisplayformat 

    qui gen tab = "\begin{tabular}{lrrrrclcrrrr}" in 1
    qui gen hline = "\hline" in 1
    qui gen cline = "\cline{2-5} \cline{8-11}" in 1
    qui gen c = " " in 1
    qui gen end = "\end{tabular}" in 1

    #delimit ;
    qui gen title1 = " & \multicolumn{4}{c}{2016 Housing Wealth} & & 
                      \multicolumn{4}{c}{2016 Housing Wealth \textit{(cont.)}}" in 1;

    qui gen title2 = "State & Assets (B) & Wealth (B) & Assets / pop (K) & 
                        Wealth / pop (K) & & State & Assets (B) & Wealth (B) &
                        Assets / pop (K) & Wealth / pop (K)" in 1;
    #delimit cr

    local tabname = "houwealthbystate"

    * Header
    qui listtex tab if _n == 1 using "`tabname'.tex", replace rstyle(none)
    qui listtex hline if _n == 1, appendto("`tabname'.tex") rstyle(none)
    qui listtex title1 if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    qui listtex cline if _n == 1, appendto("`tabname'.tex") rstyle(none)
    qui listtex title2 if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    qui listtex hline if _n==1 , appendto("`tabname'.tex") rstyle(none)        

    qui listtex *col1 c *col2 , appendto("`tabname'.tex") rstyle(tabular)        

    qui listtex hline if _n==1 , appendto("`tabname'.tex") rstyle(none)        
    qui listtex end if _n == 1, appendto("`tabname'.tex") rstyle(none)
end/*%>*/

capture program drop luigireturns/*%<*/
program define luigireturns
	*Appendix Table 2
    foreach orgform in s p {

        import delimited using $maincollapses/post_collapse_data/returns_`orgform'_luigi_20220114.txt, ///
            rowrange(2:28) varnames(2) clear

        isid pct
        keep if inlist(pct, "mean", "sd", "skew", "kurt", ".1", ".5", ".9")
        replace pct = cond(pct == ".1", "P10", cond(pct == ".5", "median", "P90")) if substr(pct, 1, 1) == "."

        local lastrow = _N 
        forv row = 1 / `lastrow' {
            local v`row'name = pct[`row']
        }

        xpose, clear

        forv col = 1 / `lastrow' {
            local newname = "`v`col'name'"
            rename v`col' `newname'
            assert missing(`newname') in 1
        }
        drop in 1

        gen orgform = "`orgform'"
        gen whichspec = "unweighted"

        tempfile `orgform'_unweighted
        save ``orgform'_unweighted'
    }

        /***************************************************************************
             Section 2: value-weighted
        ***************************************************************************/

    foreach orgform in s p {

        import delimited using $maincollapses/post_collapse_data/returns_`orgform'_luigi_20220114.txt, ///
            rowrange(29:56) varnames(29) clear

        isid pct
        keep if inlist(pct, "mean", "sd", "skew", "kurt", ".1", ".5", ".9")
        replace pct = cond(pct == ".1", "P10", cond(pct == ".5", "median", "P90")) if substr(pct, 1, 1) == "."

        local lastrow = _N 
        forv row = 1 / `lastrow' {
            local v`row'name = pct[`row']
        }

        xpose, clear

        forv col = 1 / `lastrow' {
            local newname = "`v`col'name'"
            rename v`col' `newname'
            assert missing(`newname') in 1
        }
        drop in 1

        gen orgform = "`orgform'"
        gen whichspec = "valueweighted"

        tempfile `orgform'_valueweighted
        save ``orgform'_valueweighted'
    }

    /*******************************************************************************
        Append data files together and clean up a bit
    *******************************************************************************/

    clear
    append using `s_unweighted' `p_unweighted' `s_valueweighted' `p_valueweighted'

    order orgform whichspec
    isid orgform whichspec

    gen rowlab = cond(whichspec == "valueweighted", "Value-weighted", proper(whichspec))

    foreach v of varlist mean sd P10 median P90 {
        replace `v' = 100 * `v'
    }

    format mean sd P10 median P90 %9.2fc 
    format skew kurt %9.2fc

    /*******************************************************************************
        Set up for table output
    *******************************************************************************/

    gen begtab = "\begin{tabular}{lccccccc}" in 1
    gen endtab  = "\end{tabular}" in 1

    gen toprule = "\toprule" in 1
    gen midrule = "\midrule" in 1
    gen botrule = "\bottomrule" in 1

    gen hline = "\hline" in 1
    gen cline = "\cmidrule(lr){2-8}"
    gen hspace3 = "\hspace{4mm}"

    gen c = ""

    replace rowlab = hspace3 + rowlab

    qui gen header = " & Mean (\%) & Std. Dev. (\%) & Skewness & Kurtosis & P10 (\%) & P50 (\%) & P90 (\%)" in 1

    qui gen panelA = "\textit{S-corporations}" in 1
    qui gen panelB = "\textit{Partnerships}" in 1

    local tabname = "luigi_returns"

    qui listtex begtab if _n == 1 using "`tabname'.tex", replace rstyle(none)
    qui listtex toprule if _n == 1, appendto("`tabname'.tex") rstyle(none)

    qui listtex header if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    qui listtex cline if _n == 1, appendto("`tabname'.tex") rstyle(none)

    qui listtex panelA c c c c c c c if _n == 1, appendto("`tabname'.tex") rstyle(tabular)

    qui listtex rowlab mean sd skew kurt P10 median P90 if orgform == "s", ///
        appendto("`tabname'.tex") rstyle(tabular)   

    qui listtex panelB c c c c c c c if _n == 1, appendto("`tabname'.tex") rstyle(tabular)

    qui listtex rowlab mean sd skew kurt P10 median P90 if orgform == "p", ///
        appendto("`tabname'.tex") rstyle(tabular)    

    qui listtex botrule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    qui listtex endtab if _n == 1, appendto("`tabname'.tex") rstyle(none)

end/*%>*/

capture program drop industrytable/*%<*/
program define industrytable 
	*Appendix Table 1
    tempfile names
    insheet using "scorp_nc4_valuations_20220112.csv", comma clear
    keep naics_4d longname
    rename naics_4d nc4
    duplicates drop nc4, force
    save `names'

    tempfile vals
    insheet using "$maincollapses/post_collapse_data/passthrough_nc4_values_2016.csv", comma clear
    save `vals'
    
    tempfile firmcount
    insheet using "$maincollapses/post_collapse_data/passthrough_nc4_firmcount_2016.csv", comma clear
    save `firmcount'

    use `vals', clear
    merge 1:1 nc4 using `firmcount', keep(3) nogen
    merge 1:1 nc4 using `names', keep(1 3) nogen

    egen val_combined = rsum(value_avg*)
    egen ordinc_combined = rsum(ord_inc*)
    egen firmcount_combined = rsum(firmcount*)
    egen ownercount_combined = rsum(ownercount*)
    gsort - val_combined
    gen rank = _n
    gen returns = 100 * (ordinc_combined / val_combined)
    gen val_per_firm = val_combined / firmcount_combined
    gen val_per_owner = val_combined / ownercount_combined

    egen total_val = total(val_combined)
    egen total_ordinc = total(ordinc_combined)
    egen total_value_s = total(value_avg_ebitda_hybnof_reds)
    egen total_value_p = total(value_avg_ebitda_hybnof_redp)
    egen total_firmcount = total(firmcount_combined)
    egen total_ownercount = total(ownercount_combined)
    gen total_val_per_firm = total_val / total_firmcount
    gen total_val_per_owner = total_val / total_ownercount
    gen total_returns = 100 * total_ordinc / total_val

    tostring nc4, force replace

    gen industryname = longname + " (" + nc4 + ")"

    foreach v of varlist val_combined value_avg* total_val total_value* {
        replace `v' = `v' * 1e-9
    }
    foreach v of varlist val_per* total_val_per* {
        replace `v' = `v' * 1e-6
    }

    gen begtab = "\begin{tabular}{llrrrrrr}" in 1
    gen endtab  = "\end{tabular}" in 1

    gen toprule = "\toprule" in 1
    gen botrule = "\bottomrule" in 1

    gen midrule = "\midrule" in 1

    gen c = "" in 1

    gen hspace2 = "\hspace{2mm}"
    gen hspace4 = "\hspace{4mm}"
    gen vspace2 = "\vspace{2mm}" in 1
    gen vspace4 = "\vspace{4mm}" in 1

    gen tabtitle = "Rank & Industry (NAICS) & S $+$ P Value (B$) & Returns (\%) & Value/Firm (M$) & Value/Owner (M$) & S Value & P Value" in 1
    gen cmidrule = "\cmidrule(lr){3-4} \cmidrule(lr){5-6}" in 1
    gen tabsubtitle = "& & Mean & Std. Dev. & Mean & Std. Dev." in 1

    local tabname = "industry_values"

    listtex begtab if _n == 1 using "`tabname'.tex", replace rstyle(none)
    listtex toprule if _n == 1, appendto("`tabname'.tex") rstyle(none)

    listtex tabtitle if _n == 1, appendto("`tabname'.tex") rstyle(tabular)
    
    listtex midrule if _n == 1, appendto("`tabname'.tex") rstyle(none)

    format total* val* returns %12.1fc

    count

    forv i = 1/30 {
        #delimit ;
        listtex rank industryname val_combined returns val_per_firm val_per_owner value_avg*s value_avg*p in `i', 
            appendto("`tabname'.tex") rstyle(tabular);
        #delimit cr
        if inlist(`i',15) {
            listtex midrule in 1, appendto("`tabname'.tex") rstyle(none)
        }
    }

    listtex midrule in 1, appendto("`tabname'.tex") rstyle(none)

    gen aggname = "Aggregate"

    #delimit ;
    listtex c aggname total_val total_returns total_val_per_firm total_val_per_owner total_value_s total_value_p in 1, 
        appendto("`tabname'.tex") rstyle(tabular);
    #delimit cr
    
    listtex botrule if _n == 1, appendto("`tabname'.tex") rstyle(none)
    listtex endtab if _n == 1, appendto("`tabname'.tex") rstyle(none)

end/*%>*/
/*%>*/

capture program drop graph_interest_participation/*%<*/
program define graph_interest_participation
	*Appendix Figures 24a and 24b
    load_analysis_data div_info
    keep if year == 2016

    isid wadjgross

    foreach j in "dividend" "capgains" {
        foreach i in "1099pvt" "1099fund" "1099bro" "1065" "1120s" "1041" {
            gen share_`j'_`i' = 100 * (has_`j'_`i' / count)
        }

        if ("`j'" == "capgains") {
            gen share_`j'_1099b = 100 * (has_`j'_1099b / count)
            local 1099b_line `"(connect share_`j'_1099b w`t', msize(small) ms(d) lp("-") lw(thin) mc("$u5") lc("$u5"))"'
            local 1099b_lab `"7 "1099-B""'
        }
        else {
            local 1099b_line ""
            local 1099b_lab ""
        }

        #delimit ;
        twoway 
            (connect share_`j'_1099bro wadjgross, msize(small) mc("$u1") lc("$u1") lw(thin) ms(o)) 
            (connect share_`j'_1099fund wadjgross, msize(small) mc("$p2") lc("$p2") lw(thin) ms(d))
            (connect share_`j'_1120s wadjgross, msize(small) mc("$f3") lc("$f3") lw(thin) ms(t)) 
            (connect share_`j'_1041 wadjgross, msize(small) mc("$u4") lc("$u4") lw(thin) ms(+) lp("-"))
            (connect share_`j'_1099pvt wadjgross, msize(small) mc("$f1") lc("$f1") lw(thin) ms(oh))
            (connect share_`j'_1065 wadjgross, msize(small) mc("$u3") lc("$u3") lw(thin) ms(s)) 
            `1099b_line'
            ,
            xtitle("AGI Percentile") ytitle("Share with Positive Income (%)")
            legend(order(5 "1099-DIV Private" 2 "1099-DIV Public" 1 "1099-DIV Broker" 
                         6 "1065-K1" 3 "1120S-K1" 4 "1041-K1" `1099b_lab') col(3)
                         symxsize(*.5)
                   region(lc(white))) $gpr xsize(5.5);
        #delimit cr
        graph_export "`j'_participation_adjgross_2016"
    }

end/*%>*/

capture program drop graph_interest_composition/*%<*/
program define graph_interest_composition 

*Appendix Figures 24c and 24d
    load_analysis_data div_info
    keep if year == 2016

    drop *_sec *_kid*
    collapse (sum) dividend_* txdivamt capgains_* adjgross, by(wadjgross)

    foreach i in "1099pvt" "1099fund" "1099bro" "1065" "1120s" "1041" {
        gen incomecompo_dividend_`i' = 100 * (dividend_`i' / txdivamt)
    }
    foreach i in "1099pvt" "1099fund" "1099bro" "1065" "1120s" "1041" "1099b" {
        gen incomecompo_kg_`i' = 100 * (capgains_`i' / capgains_info)
        gen incomecompo_kgAGI_`i' = 100 * (capgains_`i' / capgains_agi)
    }

    egen incomecompo_dividend_info = rsum(incomecompo_dividend_1099pvt-incomecompo_dividend_1041)
    egen incomecompo_kgAGI_info = rsum(incomecompo_kgAGI_1099pvt-incomecompo_kgAGI_1099b)
    egen incomecompo_kg_info = rsum(incomecompo_kg_1099pvt-incomecompo_kg_1099b)

    foreach i in "1099pvt" "1099fund" "1099bro" "1065" "1120s" "1041" {
        gen incomecompo_dividend_`i'R = 100 * incomecompo_dividend_`i'/incomecompo_dividend_info
    }
    foreach i in "1099pvt" "1099fund" "1099bro" "1065" "1120s" "1041" "1099b" {
        gen incomecompo_kg_`i'R = 100 * incomecompo_kg_`i'/incomecompo_kg_info
        gen incomecompo_kgAGI_`i'R = 100 * incomecompo_kgAGI_`i'/incomecompo_kgAGI_info
    }

    /***********************************************************************
        Top decile of AGI distribution only
    ***********************************************************************/

    *** Drew fixed the basis in 1099B data for A.10.D. The do files need to be merged into final
    * production files. They are in two dofiles (outside and inside) in DBOX/stata/raw/inside_collapses/20220127

    foreach j in "dividend" "kg" {
        if ("`j'" == "kg") {
            local 1099b_line `"(connect incomecompo_`j'_1099bR w`t', msize(small) ms(d) lp("-") lw(thin) mc("$u5") lc("$u5"))"'
            local 1099b_lab `"7 "1099-B""'
        }
        else {
            local 1099b_line ""
            local 1099b_lab ""
        }

        #delimit ;
        twoway 
            (connect incomecompo_`j'_1099broR wadjgross, msize(small) mc("$u1") lc("$u1") lw(thin) ms(o)) 
            (connect incomecompo_`j'_1099fundR wadjgross, msize(small) mc("$p2") lc("$p2") lw(thin) ms(d))
            (connect incomecompo_`j'_1120sR wadjgross, msize(small) mc("$f3") lc("$f3") lw(thin) ms(t)) 
            (connect incomecompo_`j'_1041R wadjgross, msize(small) mc("$u4") lc("$u4") lw(thin) ms(+) lp("-"))
            (connect incomecompo_`j'_1099pvtR wadjgross, msize(small) mc("$f1") lc("$f1") lw(thin) ms(oh))
            (connect incomecompo_`j'_1065R wadjgross, msize(small) mc("$u3") lc("$u3") lw(thin) ms(s)) 
            `1099b_line'
                if wadjgross >= 75,
            ysca(range(-2.5 102.5)) ylab(0(10)100)
            xsca(range(74.5 100.5)) xlab(75(5)100)
            xtitle("AGI Percentile") ytitle("Share of Bin's Income (%)")
            legend(order(5 "1099-DIV Private" 2 "1099-DIV Public" 1 "1099-DIV Broker" 
                         6 "1065-K1" 3 "1120S-K1" 4 "1041-K1" `1099b_lab') col(3)
                         symxsize(*.5)
                   region(lc(white))) $gpr xsize(5.5);
        #delimit cr
        graph export "`j'_incomecompo_p90_adjgross_2016.pdf", replace
    }

end/*%>*/

capture program drop graph_comparison_trends/*%<*/
program define graph_comparison_trends

    tempfile imputation

    /***************************************************************************
        Appendix Figure 9 & Figure 25c
		Effect of Refinement
    ***************************************************************************/

    /***********************************************************************
        Pull in collapsed tax data and rename/generate SZ and SZZ
            measures for housing wealth, pass-through wealth, and pension
            wealth. Drop variables which are not important for comparison.
    ***********************************************************************/
    load_analysis_data szz
    keep if inlist(group, "All", "P99.9-100")
    replace group = cond(group == "All", "_all", "_top01")

    gen taxbond_pref = cond(year < 2001, ///
                               taxbond_cmd_3tier, ///
                               taxbond_info)

    gen taxbond_eqmuf = taxbond + taxbond_mufmisc_sz

    keep year group hweal20 hweal_preferred taxbond taxbond_ini taxbond_ust10 taxbond_baa taxbond_aaa ///
        taxbond_pref taxbond_cmd_3tier taxbond_cmd_ub taxbond_cmd_lb ///
        taxbond_equal taxbond_eqmuf

    rename taxbond_cmd_3tier taxbond_cmd3

    reshape wide hweal20 hweal_preferred taxbond taxbond_ini taxbond_ust10 taxbond_baa taxbond_aaa ///
        taxbond_pref taxbond_cmd3 taxbond_cmd_ub taxbond_cmd_lb ///
        taxbond_equal taxbond_eqmuf, i(year) j(group, string)

    /***********************************************************************
        Compute shares of net household with attributable to fixed
            income and c-corp eqåuity wealth categories among top 0.1% under
            different capitalization assumptions
    ***********************************************************************/

    foreach taxbond in taxbond taxbond_ini taxbond_pref taxbond_baa taxbond_aaa taxbond_ust10 ///
        taxbond_cmd3 taxbond_cmd_ub taxbond_cmd_lb taxbond_equal taxbond_eqmuf {
        gen top01_`taxbond'_sh_aggwlth = `taxbond'_top01 / hweal_preferred_all * 100
    }

    replace top01_taxbond_ini_sh_aggwlth = taxbond_ini_top01 / hweal20_all * 100

    keep year top01_*_sh_aggwlth
    save `imputation'


    /***************************************************************************
       Consequences of Assumptions
    ***************************************************************************/
    
    /***********************************************************************
        Pull in collapsed tax data and rename/generate SZ and SZZ
            measures for housing wealth, pass-through wealth, and pension
            wealth. Drop variables which are not important for comparison.
    ***********************************************************************/
    load_analysis_data szz
    keep if inlist(group, "All", "P99.9-100")

    keep year group hweal20 hweal_preferred taxbond_muf ccorw_0kg ccorw_25kg ccorw_9010 ccorw ccorw_ini
    rename hweal_preferred hweal

    gen divw_equal = ccorw 
    gen divw_pref = ccorw_9010 
    gen divw_0kg = ccorw_0kg 
    gen divw_25kg = ccorw_25kg 

    replace group = cond(group == "All", "_all", "_top01")
    reshape wide hweal hweal20 taxbond_muf ccorw_0kg ccorw_25kg ccorw_9010 ccorw ccorw_ini divw*, i(year) j(group, string)

    /***********************************************************************
        Compute shares of net household with attributable to fixed
            income and c-corp eqåuity wealth categories among top 0.1% under
            different capitalization assumptions
    ***********************************************************************/

    foreach ccorw in divw_equal divw_pref divw_0kg divw_25kg ccorw_0kg ccorw_25kg ccorw_9010 ccorw ccorw_ini {
        gen top01_`ccorw'_sh_aggwlth = `ccorw'_top01 / hweal_all * 100
    }

    replace top01_ccorw_ini_sh_aggwlth = ccorw_ini_top01 / hweal20_all * 100

    preserve
    merge 1:1 year using `imputation', nogen
    save `imputation', replace
    restore

    /************************************************************************
        Make graphs
    ************************************************************************/

    #delimit ;
    twoway 
           (connect top01_divw_equal_sh_aggwlth year, lc("$u3") mc("$u3") ms(s)) 
           (connect top01_divw_pref_sh_aggwlth year, ms(O) lcolor("$u1") mcolor("$u1")
                lwidth(medthick))  
           (connect top01_divw_25kg_sh_aggwlth year,  ms(oh) lp("-") mc("$u1") lc("$u1"))
           (connect top01_divw_0kg_sh_aggwlth year,  ms(th) lp("-") mc("$u1") lc("$u1"))
           , 
        ytitle("Top 0.1% C-corporation Wealth as" "Share of Net Household Wealth (%)")  
        xtitle(" ") 
        xlab(1965(5)2015, labsize(small)) xsca(range(1964 2016))
        ylab(1(1)8) ysca(range(0.5 7.5))
        legend(region(lcolor(white) margin(tiny)) row(2) symxsize(*.6)
            order(1 "Equal Returns" 2 "Baseline" 
                  3 "0.75 Divs Weight" 4 "Divs Only"))  
        $gpr;
    #delimit cr
    graph export "effect_refinement_ccorw.pdf", replace

    /***************************************************************************
        (Appendix) Consequences of Assumptions for p-thru, housing, pensions
    ***************************************************************************/
    
    /***********************************************************************
        Pull in collapsed tax data and rename/generate SZ and SZZ
            measures. Drop variables which are not important for comparison.
    ***********************************************************************/
    /************************************************************************
        Pass-through
    ************************************************************************/
    load_analysis_data szz
    keep if inlist(group, "All", "P99.9-100")

    keep year group hweal20 hweal_preferred *_value_avg_ebitda_red scorw scorw_ini ///
        partw partw_ini solepropw solepropw_ini *_value_avg_ebitda_nof_red *_value_avg_ebitda_full_red ///
        missing* *_value_avg_ebitda_hybnof_red scorw_szzhybnof_scaled partw_szzhybnof_sz20_scaled solepropw_szz_sz20_scaled ///
        scorw_szz_scaled partw_szz_sz20_scaled partw_sz20_scaled solepropw_sz20_scaled
    rename hweal_preferred hweal
    rename *_value_avg_ebitda_hybnof_red *_pref
    rename *_value_avg_ebitda_nof_red *_nof
    rename *_value_avg_ebitda_full_red *_full
    rename *_value_avg_ebitda_red *_nohybnof
    * newer stuff
    rename partw_szzhybnof_sz20_scaled p_prefSc
    rename scorw_szzhybnof_scaled s_prefSc
    rename solepropw_szz_sz20_scaled so_szzSc
    rename partw_szz_sz20_scaled p_szzSc
    rename scorw_szz_scaled s_szzSc
    rename solepropw_sz20_scaled so_sz20Sc
    rename partw_sz20_scaled p_sz20Sc

    replace group = cond(group == "All", "_all", "_top01")
    reshape wide hweal hweal20 *_pref *_nof *_full *_nohybnof *_prefSc *_szzSc *_sz20Sc ///
        scor* part* sole* missing*, i(year) j(group, string)

    /***********************************************************************
        Compute shares of net household among top 0.1% under
            different capitalization assumptions
    ***********************************************************************/

    foreach pthru in s_pref p_pref s_nof p_nof s_full p_full ///
        missing_scorp missing_pship ///
        scorw scorw_ini partw partw_ini solepropw solepropw_ini ///
        s_nohybnof p_nohybnof s_prefSc p_prefSc s_szzSc p_szzSc so_szzSc ///
        p_sz20Sc so_sz20Sc {
        gen top01_`pthru'_sh_aggwlth = `pthru'_top01 / hweal_all * 100
    }
    * PSZ 18
    gen top01_pthru_ini_sh_aggwlth = (solepropw_ini_top01 + scorw_ini_top01 + partw_ini_top01) / hweal20_all * 100

    * Preferred/Baseline
    gen top01_pthru_sh_aggwlth = top01_so_szzSc_sh_aggwlth + top01_s_prefSc_sh_aggwlth + top01_p_prefSc_sh_aggwlth
    gen top01_pthruM_sh_aggwlth = top01_pthru_sh_aggwlth + top01_missing_scorp_sh_aggwlth + top01_missing_pship_sh_aggwlth

    * Equal returns (2020 aggs)
    gen top01_pthru_equ_sh_aggwlth = top01_so_sz20Sc_sh_aggwlth + top01_scorw_sh_aggwlth + top01_p_sz20Sc_sh_aggwlth
    gen top01_pthru_equM_sh_aggwlth = top01_pthru_equ_sh_aggwlth + top01_missing_scorp_sh_aggwlth + top01_missing_pship_sh_aggwlth

    foreach x in "nof" "full" "pref" "nohybnof" {
        gen top01_pthru_`x'_sh_aggwlth = top01_solepropw_sh_aggwlth + top01_s_`x'_sh_aggwlth + top01_p_`x'_sh_aggwlth
        gen top01_pthru_`x'M_sh_aggwlth = top01_pthru_`x'_sh_aggwlth + top01_missing_scorp_sh_aggwlth + top01_missing_pship_sh_aggwlth
    }
    
    gen top01_pthru_base_sh_aggwlth = cond(year > 2000, top01_pthru_sh_aggwlth, top01_pthru_equ_sh_aggwlth)

    preserve
    merge 1:1 year using `imputation', nogen
    save `imputation', replace
    restore

    /************************************************************************
        Make graphs
    ************************************************************************/

    #delimit ;
    twoway 
           (connect top01_pthru_equ_sh_aggwlth year, lc("$u3") mc("$u3") ms(s)) 
           (connect top01_pthru_equM_sh_aggwlth year, lc("$u3") mc("$u3") ms(dh) lp(-)) 
           (connect top01_pthru_base_sh_aggwlth year if year >= 2001, ms(O) lcolor("$u1") mcolor("$u1")
                lwidth(medthick))  
           (connect top01_pthru_pref_sh_aggwlth year, lc("$u1") mc("$u1") ms(th) lp(-)) 
           (connect top01_pthru_full_sh_aggwlth year, lc("$u1") mc("$u1") ms(oh) lp(-)) 
           (connect top01_pthru_nof_sh_aggwlth year, lc("$u1") mc("$u1") ms(sh) lp(-)) 
           , 
        ytitle("Top 0.1% Pass-Through Wealth as" "Share of Net Household Wealth (%)")  
        xtitle(" ") 
        xlab(1965(5)2015, labsize(small)) xsca(range(1964 2016))
        ylab(0(1)5) ysca(range(0 5))
        legend(region(lcolor(white) margin(tiny)) row(3) symxsize(*.6)
            order(1 "Equal Returns" 
                  3 "Baseline" 
                  2 "Equal incl. Missing" 
                  4 "Info Returns" 
                  5 "Info w/o Labor, Fin Adj" 
                  6 "Info w/o Hybrid Adj"
                  ))  
        $gpr;
    #delimit cr
    graph export "effect_refinement_pthru.pdf", replace

    /************************************************************************
        Housing
    ************************************************************************/
    load_analysis_data szz
    keep if inlist(group, "All", "P99.9-100")

    keep year group hweal20 hweal_preferred ownerhome* rental* ownermort*
    rename hweal_preferred hweal

    replace group = cond(group == "All", "_all", "_top01")
    reshape wide hweal hweal20 ownerhome* rental* ownermort*, i(year) j(group, string)

    /***********************************************************************
        Compute shares of net household among top 0.1% under
            different capitalization assumptions
    ***********************************************************************/

    foreach hou in ownerhome_szz ownerhome_ini ownerhome rentalhome rentalhome_ini ///
                rentalmort rentalmort_ini ownermort ownermort_ini {
        gen top01_`hou'_sh_aggwlth = `hou'_top01 / hweal_all * 100
    }

    gen top01_othhou_equ_sh_aggwlth = top01_rentalhome_sh_aggwlth + top01_ownermort_sh_aggwlth + top01_rentalmort_sh_aggwlth
    gen top01_othhou_ini_sh_aggwlth = top01_rentalhome_ini_sh_aggwlth + top01_ownermort_ini_sh_aggwlth + top01_rentalmort_ini_sh_aggwlth

    gen top01_hwhou_equ_sh_aggwlth = top01_ownerhome_sh_aggwlth + top01_othhou_equ_sh_aggwlth
    replace top01_hwhou_equ_sh_aggwlth = cond(year < 1980, ///
        top01_ownerhome_ini_sh_aggwlth + top01_rentalhome_sh_aggwlth + top01_ownermort_ini_sh_aggwlth + top01_rentalmort_sh_aggwlth, ///
        top01_hwhou_equ_sh_aggwlth)

    gen top01_hwhou_pref_sh_aggwlth = top01_ownerhome_szz_sh_aggwlth + top01_othhou_equ_sh_aggwlth 
    replace top01_hwhou_pref_sh_aggwlth = cond(year < 1980, ///
        top01_ownerhome_ini_sh_aggwlth + top01_rentalhome_sh_aggwlth + top01_ownermort_ini_sh_aggwlth + top01_rentalmort_sh_aggwlth, ///
        top01_hwhou_pref_sh_aggwlth)

    gen top01_hwhou_ini_sh_aggwlth = top01_ownerhome_ini_sh_aggwlth + top01_othhou_ini_sh_aggwlth
    replace top01_hwhou_ini_sh_aggwlth = top01_hwhou_ini_sh_aggwlth * (hweal_all / hweal20_all)

    preserve
    merge 1:1 year using `imputation', nogen
    save `imputation', replace
    restore

    /************************************************************************
        Make graphs
    ************************************************************************/

    #delimit ;
    twoway 
           (connect top01_hwhou_equ_sh_aggwlth year, lc("$u3") mc("$u3") ms(s)) 
           (connect top01_hwhou_pref_sh_aggwlth year if year > 1979, ms(O) lcolor("$u1") mcolor("$u1")
                lwidth(medthick))  
           , 
        ytitle("Top 0.1% Housing Wealth as" "Share of Net Household Wealth (%)")  
        xtitle(" ") 
        xlab(1965(5)2015, labsize(small)) xsca(range(1964 2016))
        ylab(0(1)5) ysca(range(0 5))
        legend(region(lcolor(white) margin(tiny)) row(1) symxsize(*.6)
            order(1 "Equal Returns" 2 "Baseline"))  
        $gpr;
    #delimit cr
    graph export "effect_refinement_hwhou.pdf", replace

    /************************************************************************
        Pensions
    ************************************************************************/
    tempfile fig2
    insheet using aggwealth.csv, comma clear
    gen hweal_scale = (hweal_preferred - 1e6 * ttpen + ttpen_preferred) / hweal_preferred
    gen pen_scale = ttpen_preferred / (1e6 * ttpen)
    keep year hweal_scale pen_scale
    save `fig2'

    load_analysis_data szz
    keep if inlist(group, "All", "P99.9-100")

    keep year group hweal_preferred hweal20 hwpen hwpen_ini penw_szz_scaled ///
        szz_penw_pre1980 szz_penw_pre1980_v8 penw_szz_scaled_v8
    rename hweal_preferred hweal

    replace group = cond(group == "All", "_all", "_top01")
    reshape wide hweal hweal20 hwpen hwpen_ini penw_szz_scaled ///
        szz_penw_pre1980 szz_penw_pre1980_v8 penw_szz_scaled_v8, i(year) j(group, string)

    /***********************************************************************
        Compute shares of net household among top 0.1% under
            different capitalization assumptions
    ***********************************************************************/

    rename penw_szz_scaled_* penw_szz_*
    rename szz_penw_pre1980_* penw_szz80_*
    foreach pen in  hwpen hwpen_ini penw_szz penw_szz80 penw_szz_v8 penw_szz80_v8 {
        gen top01_`pen'_sh_aggwlth = `pen'_top01 / hweal_all * 100
    }

    gen top01_hwpen_equ_sh_aggwlth = top01_hwpen_ini_sh_aggwlth

    replace top01_hwpen_ini_sh_aggwlth = top01_hwpen_ini_sh_aggwlth * (hweal_all / hweal20_all)

    gen top01_hwpen_base_sh_aggwlth = cond(year >= 1980, ///
                top01_penw_szz_v8_sh_aggwlth, top01_penw_szz80_v8_sh_aggwlth)

    gen top01_hwpen_pref_sh_aggwlth = cond(year >= 1980, ///
                top01_penw_szz_sh_aggwlth, top01_penw_szz80_sh_aggwlth)

    merge 1:1 year using `fig2', nogen
    replace top01_hwpen_pref_sh_aggwlth = top01_hwpen_ini_sh_aggwlth / hweal_scale

    keep if year >= 1966 & year <= 2016

    preserve
    merge 1:1 year using `imputation', nogen
    save `imputation', replace
    restore


    /************************************************************************
        Make graphs
    ************************************************************************/

    #delimit ;
    twoway (connect top01_hwpen_equ_sh_aggwlth year, lc("$u3") mc("$u3") ms(s)) 
           (connect top01_hwpen_base_sh_aggwlth year, ms(O) lcolor("$u1") mcolor("$u1")
                lwidth(medthick))  
           , 
        ytitle("Top 0.1% Pension Wealth as" "Share of Net Household Wealth (%)")  
        xtitle(" ") 
        xlab(1965(5)2015, labsize(small)) xsca(range(1964 2016))
        ylab(0(1)5) ysca(range(0 5))
        legend(region(lcolor(white) margin(tiny)) row(1) symxsize(*.6)
            order(1 "Equal Returns" 2 "Baseline"))  
        $gpr;
    #delimit cr
    graph export "effect_refinement_hwpen.pdf", replace

    /************************************************************************
        Other
    ************************************************************************/
    load_analysis_data szz
    keep if inlist(group, "All", "P99.9-100")

    gen taxbond_pref = cond(year < 2001, ///
                               taxbond_cmd_3tier, ///
                               taxbond_info)
                               
    gen taxbond_base = taxbond_pref
    gen taxbond_equ = taxbond 

    gen divw_pref = ccorw_9010 + taxbond_muf 
    gen divw_equal = ccorw + taxbond_muf 
    gen divw_base = divw_equal
    gen divw_equ = ccorw

    gen pthru_equal = pthru_equ
    gen pthru_equal_mis = pthru_equ + missing_scorp + missing_pship
    gen pthru_ini = solepropw_ini + scorw_ini + partw_ini
    gen hwhou_equal = hwhou_equ
    gen hwhou_ini = ownerhome_ini + rentalhome_ini + ownermort_ini + rentalmort_ini 

    #delimit ;
    keep year group hweal_preferred hweal20 
        /* Taxbond */
        taxbond_pref taxbond taxbond_ini taxbond_equ taxbond_base
        /* Divw */
        divw_pref divw_equal ccorw_ini divw_base divw_equ
        /* Passthrough */
        pthru_pref pthru_equal pthru_equal_mis pthru_equ pthru_ini pthru_base
        /* Housing */
        hwhou_pref hwhou_equal hwhou_ini hwhou_base hwhou_equ
        /* Pension */
        hwpen_pref hwpen hwpen_ini hwpen_base hwpen_equ;
    #delimit cr
    rename hweal_preferred hweal

    gen oth_base = hweal - taxbond_base - divw_base - pthru_base - hwhou_base - hwpen_base
    gen oth_pref = hweal - taxbond_pref - divw_pref - pthru_pref - hwhou_pref - hwpen_pref
    gen oth_equal = hweal - taxbond - divw_equal - pthru_equal - hwhou_equal - hwpen
    gen oth_equal_mis = hweal - taxbond - divw_equal - pthru_equal_mis - hwhou_equal - hwpen
    gen oth_ini = hweal20 - taxbond_ini - ccorw_ini - pthru_ini - hwhou_ini - hwpen_ini
    gen oth_equ = hweal - taxbond_equ - divw_equ - pthru_equ - hwhou_equ - hwpen_equ
    keep year group oth_* hweal hweal20

    replace group = cond(group == "All", "_all", "_top01")
    reshape wide hweal hweal20 oth_base oth_pref oth_equal oth_equal_mis oth_ini oth_equ, i(year) j(group, string)

    /***********************************************************************
        Compute shares of net household among top 0.1% under
            different capitalization assumptions
    ***********************************************************************/

    foreach oth in oth_base oth_pref oth_equal oth_equal_mis oth_ini oth_equ {
        gen top01_`oth'_sh_aggwlth = `oth'_top01 / hweal_all * 100
    }

    replace top01_oth_ini_sh_aggwlth = top01_oth_ini_sh_aggwlth * (hweal_all / hweal20_all)

    preserve
    merge 1:1 year using `imputation', nogen
    save `imputation', replace
    restore

    /************************************************************************
        Make graphs
    ************************************************************************/

    #delimit ;
    twoway 
           (connect top01_oth_equ_sh_aggwlth year, lc("$u3") mc("$u3") ms(s)) 
           (connect top01_oth_base_sh_aggwlth year, ms(O) lcolor("$u1") mcolor("$u1")
                lwidth(medthick))  
           , 
        ytitle("Top 0.1% Other Wealth as" "Share of Net Household Wealth (%)")  
        xtitle(" ") 
        xlab(1965(5)2015, labsize(small)) xsca(range(1964 2016))
        ylab(-1(1)5) ysca(range(-1 4))
        legend(region(lcolor(white) margin(tiny)) row(1) symxsize(*.6)
            order(1 "Equal Returns" 2 "Baseline"))  
        $gpr;
    #delimit cr
    graph export "effect_refinement_hwoth.pdf", replace

    /************************************************************************
        Imputation interval
    ************************************************************************/
    use `imputation', clear
    save imputation_preferred.dta, replace

end/*%>*/
*******************************************************************************

capture program drop graph_hetero_fixed/*%<*/
program define graph_hetero_fixed

    /***************************************************************************
        Appendix Figure 4
		Average Rates of Return in Capitalized Data
    ***************************************************************************/

    /***********************************************************************
        Prep Alexi Savov deposit rates in e-mail received by EZ on 
            10/15/2019 with subject line "Re: Aggregate Deposits Data." 
            Collapse by mean to yield annual deposit rates, then collapse 
            again to get average of these. Store result in global macro.
    ***********************************************************************/
    local start_year 2016

    load_analysis_data deposit_rates
    keep if inrange(year, `start_year', 2016)
    
    collapse (mean) r_deposit // Then get average over years
    sum r_deposit
    local r_deposit = `r(mean)'

    /***********************************************************************
        Get averages of 10-year Treasury and Aaa yields over 
            1998-2016; store in global macro
    ***********************************************************************/

    load_analysis_data fred_rates
    keep if inrange(year, `start_year', 2016)

    collapse (mean) r_ust10 r_aaa

    sum r_ust10
    local r_ust10 = `r(mean)'

    sum r_aaa
    local r_aaa = `r(mean)'

    /***********************************************************************
        Prep tax data ranked by non-interest wealth and preferred wealth
    ***********************************************************************/

    load_analysis_data szz_niw
    keep if inrange(year, `start_year', 2016)

    keep if inlist(w${niw_defn_late}_group, 1, 7, 8, 9, 10, 5)
    assert inlist(group, "All", "P0-90", "P90-99", "P99-99.9", "P99.9-99.99", "P99.99-100")

    gen r_niw = (interest_info_scaled / taxbond_info) * 100

    gen groupnum = cond(w${niw_defn_late}_group != 1, w${niw_defn_late}_group - 5, ///
                                                      w${niw_defn_late}_group)

    replace groupnum = 6 if w${niw_defn_late}_group == 5

    keep groupnum r_niw

    tempfile inside_niw
    save `inside_niw'

    load_analysis_data szz
    keep if inrange(year, `start_year', 2016)

    keep if inlist(w${preferred_defn_late}_group, 1, 7, 8, 9, 10, 5)
    assert inlist(group, "All", "P0-90", "P90-99", "P99-99.9", "P99.9-99.99", "P99.99-100")

    gen r_pref = (interest_info_scaled / taxbond_info) * 100

    gen groupnum = cond(w${preferred_defn_late}_group != 1, w${preferred_defn_late}_group - 5, ///
                                                      w${preferred_defn_late}_group)

    * TODO: remove when niw file is fixed
    replace groupnum = 6 if w${preferred_defn_late}_group == 5


    keep groupnum r_pref

    tempfile inside_pref
    save `inside_pref'

    load_analysis_data szz_cmd
    keep if inrange(year, `start_year', 2016)

    keep if inlist(w${cmd_defn_late}_group, 1, 7, 8, 9, 10, 5)
    assert inlist(group, "All", "P0-90", "P90-99", "P99-99.9", "P99.9-99.99", "P99.99-100")

    gen r_cmd = ((fiint + intest) / taxbond_cmd_3tier) * 100

    gen groupnum = cond(w${cmd_defn_late}_group != 1, w${cmd_defn_late}_group - 5, ///
                                                      w${cmd_defn_late}_group)

    replace groupnum = 6 if w${cmd_defn_late}_group == 5

    keep groupnum r_cmd

    tempfile inside_cmd
    save `inside_cmd'

    /***********************************************************************
        Prep tax data ranked by AGI
    ***********************************************************************/
    load_analysis_data szz_agi
    keep if inrange(year, `start_year', 2016)

    keep year group interest_info_scaled taxbond_info

    * Groups are mutually exclusive, collectively exhaustive
    levelsof group, local(groups) clean
    assert "`groups'" == "P0-90 P90-99 P99-99.9 P99.9-99.99 P99.99-100"    

    /*******************************************************************
        Calculate total fixed income and fixed claims
    *******************************************************************/

    sort year
    foreach sumqty of varlist interest_info_scaled taxbond_info {
        egen total_`sumqty' = total(`sumqty')
    }

    /*************************************k*****************************
        Put totals in a dedicated row instead of Keeping
            them in a column
    *******************************************************************/

    expand 2 if group == "P0-90" 
    
    sort year group
    by year: assert group[1] == "P0-90" & group[2] == "P0-90"
    by year: replace group = "All" if _n == 1
    by year: replace interest_info_scaled = total_interest_info_scaled if group == "All"
    by year: replace taxbond_info = total_taxbond_info if group == "All"

    encode group, gen(groupnum)

    isid groupnum

    gen r_agi = (interest_info_scaled / taxbond_info) * 100

    keep groupnum r_agi

    tempfile inside_agi
    save `inside_agi'

    /***********************************************************************
        Merge deposit and SCF data files together, plus merge in 
            estate tax returns interest rates; then make plots
    ***********************************************************************/

    use `inside_pref', clear
    merge 1:1 groupnum using `inside_niw', assert(3) nogen
    merge 1:1 groupnum using `inside_cmd', assert(3) nogen
    merge 1:1 groupnum using `inside_agi', assert(3) nogen

    #delimit ;
    graph bar (asis) r_pref r_cmd 
        ,
        $gpr
        bargap(20)
        bar(2, color("$p21"))
        bar(1, color("$u7"))
        legend(order(1 "Baseline Wealth" 2 "CMD 3-Tier" ) 
            region(lcolor(white) margin(tiny)) row(1)) 
        over(group, relabel(1 "All" 2 "P0-90" 3 "P90-99" 4 "P99-99.9" 5 "P99.9-99.99" 
                            6 "P99.99-100")) 
        ylab(0(1)4) yscale(range(0 4))
        ytitle("Return on Taxable-Interest Assets (%)")
        xsize(6.5);
    #delimit cr
    graph export "avgfixreturn2016_appendix.pdf", replace

end/*%>*/

capture program drop graph_imputation/*%<*/
program define graph_imputation
	
	*Appendix Figure 37
	
    tempfile sz2020
    insheet using sz2020_tu_top01.csv, comma clear
    replace top01_sz20 = 100 * top01_sz20
    save `sz2020'

    * SZ 2020 appendix graph
    use `sz2020', clear
    merge 1:1 year using `szztu', keep(1 3) nogen
    merge 1:1 year using `fig1', keep(1 3) nogen
    drop top01_preferred
    merge 1:1 year using `fig1a', keep(1 3) nogen

    keep if year >= 1966

    #delimit ;
    twoway (connect top01_sz16 year, ms(s) lc("$u3") mc("$u3")) 
            (connect top01_sz20 year, ms(sh) lc("$f3") mc("$f3") lpattern(shortdash)) 
            (connect top01_pref year, ms(o) color("$u1") lw(medthick)), 
            legend(col(3) region(lcolor(white) margin(tiny)) 
                order(1 "SZ 2016" 
                      2 "SZ 2020" 
                      3 "Baseline" 
                      )) 
            $gpr xlab(1965(5)2015) xtitle(" ") 
            yscale(range(4 22)) ylab(5(2.5)22.5)
            ytitle("Share of Total Household Wealth (%)") xsize(6);
    #delimit cr
    graph_export "revisionists_comparison"

end/*%>*/


capture program drop main
program define main

    *************************************************************
    * Set up
    *************************************************************
    !mkdir "$outputdir/draft_final"
    log_start, f("draft-final")
    local outdir $outputdir/draft_final
    cd "`outdir'"
    local graphdir "path/to/graphs"
    local tabledir "path/to/tables"
    *************************************************************
    * Sections
    *************************************************************
    set trace on
    set tracedepth 3

    * Creates some environment variables for preferred specs
    set_env_ds

    **************************************************************************** 
    * Appendix Figures
    **************************************************************************** 
    aggwealth_extended
    graph_aggregate_fiscal_wealth
    graph_fiscal_shares
    sz16fig4Breplication
    topsharegrowthdecomp

    graph_soca_portfolio_equity
    graph_optimal_alpha
    graph_forbes_adjustment

    graph_pension_age
    graph_socialsecurity

    graph_usa_cross
    graph_states_overtime

    graph_ultrarich_top01

    public_corp_share
    carried_interest
    persistence
    pension_aggs
    wealthconcentration_tu 
    social_security_aggs
    topportfoliototals
    scfadj
    validate_housing
    top01wealthmortsensitivity
    estate_top01shares
    interestrate_pships
    scf_scaleprivbiz
    scf_scaletaxbond
    scf_stderrors
    scf_stderrors_bus
    scf_stderrors_ratio
    graph_scf_stderrors_int
    estate_stderrors_int
    pship_int_facts

    **************************************************************************** 
    * Appendix Tables
    **************************************************************************** 
    cmd_parameters_table
    calibrated_moments_table
    scfbizsummstats
    scfbizmultiples
    scfbizmultiples_top1detail
    compustatmultiples
    houwealth
    wordcloud
    luigireturns
    industrytable


    *************************************************************
    * Clean up
    *************************************************************
    *graph_copy, draftdir(`graphdir')
    *table_copy, draftdir(`tabledir')

    cd $localcodedir
    log close

end
